aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/include
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/include')
-rw-r--r--arch/x86/include/asm/Kbuild4
-rw-r--r--arch/x86/include/asm/acpi.h2
-rw-r--r--arch/x86/include/asm/amd_nb.h23
-rw-r--r--arch/x86/include/asm/apic.h5
-rw-r--r--arch/x86/include/asm/asm-prototypes.h16
-rw-r--r--arch/x86/include/asm/cpufeature.h1
-rw-r--r--arch/x86/include/asm/cpufeatures.h12
-rw-r--r--arch/x86/include/asm/crypto/glue_helper.h39
-rw-r--r--arch/x86/include/asm/e820.h12
-rw-r--r--arch/x86/include/asm/efi.h16
-rw-r--r--arch/x86/include/asm/floppy.h20
-rw-r--r--arch/x86/include/asm/fpu/api.h10
-rw-r--r--arch/x86/include/asm/fpu/internal.h139
-rw-r--r--arch/x86/include/asm/fpu/types.h34
-rw-r--r--arch/x86/include/asm/fpu/xstate.h17
-rw-r--r--arch/x86/include/asm/idle.h22
-rw-r--r--arch/x86/include/asm/intel_rdt.h224
-rw-r--r--arch/x86/include/asm/intel_rdt_common.h27
-rw-r--r--arch/x86/include/asm/kdebug.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h19
-rw-r--r--arch/x86/include/asm/kvm_page_track.h14
-rw-r--r--arch/x86/include/asm/lguest_hcall.h1
-rw-r--r--arch/x86/include/asm/mce.h34
-rw-r--r--arch/x86/include/asm/microcode.h18
-rw-r--r--arch/x86/include/asm/microcode_amd.h30
-rw-r--r--arch/x86/include/asm/microcode_intel.h4
-rw-r--r--arch/x86/include/asm/mmu.h4
-rw-r--r--arch/x86/include/asm/mmu_context.h2
-rw-r--r--arch/x86/include/asm/mpx.h4
-rw-r--r--arch/x86/include/asm/msr-index.h4
-rw-r--r--arch/x86/include/asm/msr.h46
-rw-r--r--arch/x86/include/asm/mutex.h5
-rw-r--r--arch/x86/include/asm/mutex_32.h110
-rw-r--r--arch/x86/include/asm/mutex_64.h127
-rw-r--r--arch/x86/include/asm/paravirt.h10
-rw-r--r--arch/x86/include/asm/paravirt_types.h18
-rw-r--r--arch/x86/include/asm/percpu.h11
-rw-r--r--arch/x86/include/asm/pgtable_64.h3
-rw-r--r--arch/x86/include/asm/preempt.h8
-rw-r--r--arch/x86/include/asm/processor.h99
-rw-r--r--arch/x86/include/asm/qspinlock.h6
-rw-r--r--arch/x86/include/asm/special_insns.h13
-rw-r--r--arch/x86/include/asm/stacktrace.h8
-rw-r--r--arch/x86/include/asm/topology.h32
-rw-r--r--arch/x86/include/asm/trace/exceptions.h2
-rw-r--r--arch/x86/include/asm/trace/fpu.h5
-rw-r--r--arch/x86/include/asm/trace/irq_vectors.h2
-rw-r--r--arch/x86/include/asm/tsc.h9
-rw-r--r--arch/x86/include/asm/uaccess.h13
-rw-r--r--arch/x86/include/asm/unwind.h18
-rw-r--r--arch/x86/include/asm/vgtod.h7
-rw-r--r--arch/x86/include/asm/vmx.h37
-rw-r--r--arch/x86/include/asm/x86_init.h26
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h1
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h4
-rw-r--r--arch/x86/include/uapi/asm/mce.h1
-rw-r--r--arch/x86/include/uapi/asm/prctl.h8
-rw-r--r--arch/x86/include/uapi/asm/vmx.h5
58 files changed, 766 insertions, 626 deletions
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 2cfed174e3c9..2b892e2313a9 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -6,10 +6,6 @@ generated-y += unistd_32_ia32.h
generated-y += unistd_64_x32.h
generated-y += xen-hypercalls.h
-genhdr-y += unistd_32.h
-genhdr-y += unistd_64.h
-genhdr-y += unistd_x32.h
-
generic-y += clkdev.h
generic-y += cputime.h
generic-y += dma-contiguous.h
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 5391b0ae7cc3..395b69551fce 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -94,7 +94,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate)
boot_cpu_data.x86_model <= 0x05 &&
boot_cpu_data.x86_mask < 0x0A)
return 1;
- else if (amd_e400_c1e_detected)
+ else if (boot_cpu_has(X86_BUG_AMD_APIC_C1E))
return 1;
else
return max_cstate;
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 5e828da2e18f..00c88a01301d 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -21,6 +21,10 @@ extern int amd_numa_init(void);
extern int amd_get_subcaches(int);
extern int amd_set_subcaches(int, unsigned long);
+extern int amd_smn_read(u16 node, u32 address, u32 *value);
+extern int amd_smn_write(u16 node, u32 address, u32 value);
+extern int amd_df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo);
+
struct amd_l3_cache {
unsigned indices;
u8 subcaches[4];
@@ -55,6 +59,7 @@ struct threshold_bank {
};
struct amd_northbridge {
+ struct pci_dev *root;
struct pci_dev *misc;
struct pci_dev *link;
struct amd_l3_cache l3_cache;
@@ -66,7 +71,6 @@ struct amd_northbridge_info {
u64 flags;
struct amd_northbridge *nb;
};
-extern struct amd_northbridge_info amd_northbridges;
#define AMD_NB_GART BIT(0)
#define AMD_NB_L3_INDEX_DISABLE BIT(1)
@@ -74,20 +78,9 @@ extern struct amd_northbridge_info amd_northbridges;
#ifdef CONFIG_AMD_NB
-static inline u16 amd_nb_num(void)
-{
- return amd_northbridges.num;
-}
-
-static inline bool amd_nb_has_feature(unsigned feature)
-{
- return ((amd_northbridges.flags & feature) == feature);
-}
-
-static inline struct amd_northbridge *node_to_amd_nb(int node)
-{
- return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL;
-}
+u16 amd_nb_num(void);
+bool amd_nb_has_feature(unsigned int feature);
+struct amd_northbridge *node_to_amd_nb(int node);
static inline u16 amd_pci_dev_to_node_id(struct pci_dev *pdev)
{
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index f5aaf6c83222..0c5fbc68e82d 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -11,7 +11,6 @@
#include <asm/fixmap.h>
#include <asm/mpspec.h>
#include <asm/msr.h>
-#include <asm/idle.h>
#define ARCH_APICTIMER_STOPS_ON_C3 1
@@ -196,7 +195,7 @@ static inline void native_apic_msr_write(u32 reg, u32 v)
static inline void native_apic_msr_eoi_write(u32 reg, u32 v)
{
- wrmsr(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK, 0);
+ wrmsr_notrace(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK, 0);
}
static inline u32 native_apic_msr_read(u32 reg)
@@ -332,6 +331,7 @@ struct apic {
* on write for EOI.
*/
void (*eoi_write)(u32 reg, u32 v);
+ void (*native_eoi_write)(u32 reg, u32 v);
u64 (*icr_read)(void);
void (*icr_write)(u32 low, u32 high);
void (*wait_icr_idle)(void);
@@ -639,7 +639,6 @@ extern void irq_exit(void);
static inline void entering_irq(void)
{
irq_enter();
- exit_idle();
}
static inline void entering_ack_irq(void)
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
new file mode 100644
index 000000000000..44b8762fa0c7
--- /dev/null
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -0,0 +1,16 @@
+#include <asm/ftrace.h>
+#include <asm/uaccess.h>
+#include <asm/string.h>
+#include <asm/page.h>
+#include <asm/checksum.h>
+
+#include <asm-generic/asm-prototypes.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/special_insns.h>
+#include <asm/preempt.h>
+
+#ifndef CONFIG_X86_CMPXCHG64
+extern void cmpxchg8b_emu(void);
+#endif
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 1d2b69fc0ceb..d59c15c3defd 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -204,6 +204,7 @@ static __always_inline __pure bool _static_cpu_has(u16 bit)
#define static_cpu_has_bug(bit) static_cpu_has((bit))
#define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit))
+#define boot_cpu_set_bug(bit) set_cpu_cap(&boot_cpu_data, (bit))
#define MAX_CPU_FEATURES (NCAPINTS * 32)
#define cpu_have_feature boot_cpu_has
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index a39629206864..eafee3161d1c 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -104,8 +104,8 @@
#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
-#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
+#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
@@ -189,10 +189,14 @@
#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
+#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
+#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
+#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
@@ -221,11 +225,13 @@
#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
+#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */
#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
+#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
@@ -279,8 +285,10 @@
#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
+#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
+#define X86_FEATURE_RDPID (16*32+ 22) /* RDPID instruction */
/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
@@ -311,4 +319,6 @@
#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */
#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
+#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
+
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h
index 03bb1065c335..29e53ea7d764 100644
--- a/arch/x86/include/asm/crypto/glue_helper.h
+++ b/arch/x86/include/asm/crypto/glue_helper.h
@@ -5,8 +5,8 @@
#ifndef _CRYPTO_GLUE_HELPER_H
#define _CRYPTO_GLUE_HELPER_H
+#include <crypto/internal/skcipher.h>
#include <linux/kernel.h>
-#include <linux/crypto.h>
#include <asm/fpu/api.h>
#include <crypto/b128ops.h>
@@ -69,6 +69,31 @@ static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit,
return true;
}
+static inline bool glue_skwalk_fpu_begin(unsigned int bsize,
+ int fpu_blocks_limit,
+ struct skcipher_walk *walk,
+ bool fpu_enabled, unsigned int nbytes)
+{
+ if (likely(fpu_blocks_limit < 0))
+ return false;
+
+ if (fpu_enabled)
+ return true;
+
+ /*
+ * Vector-registers are only used when chunk to be processed is large
+ * enough, so do not enable FPU until it is necessary.
+ */
+ if (nbytes < bsize * (unsigned int)fpu_blocks_limit)
+ return false;
+
+ /* prevent sleeping if FPU is in use */
+ skcipher_walk_atomise(walk);
+
+ kernel_fpu_begin();
+ return true;
+}
+
static inline void glue_fpu_end(bool fpu_enabled)
{
if (fpu_enabled)
@@ -139,6 +164,18 @@ extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
common_glue_func_t tweak_fn, void *tweak_ctx,
void *crypt_ctx);
+extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
+ struct blkcipher_desc *desc,
+ struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes,
+ common_glue_func_t tweak_fn, void *tweak_ctx,
+ void *crypt_ctx);
+
+extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
+ struct skcipher_request *req,
+ common_glue_func_t tweak_fn, void *tweak_ctx,
+ void *crypt_ctx);
+
extern void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src,
le128 *iv, common_glue_func_t fn);
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 476b574de99e..ec23d8e1297c 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -1,13 +1,17 @@
#ifndef _ASM_X86_E820_H
#define _ASM_X86_E820_H
-#ifdef CONFIG_EFI
+/*
+ * E820_X_MAX is the maximum size of the extended E820 table. The extended
+ * table may contain up to 3 extra E820 entries per possible NUMA node, so we
+ * make room for 3 * MAX_NUMNODES possible entries, beyond the standard 128.
+ * Also note that E820_X_MAX *must* be defined before we include uapi/asm/e820.h.
+ */
#include <linux/numa.h>
#define E820_X_MAX (E820MAX + 3 * MAX_NUMNODES)
-#else /* ! CONFIG_EFI */
-#define E820_X_MAX E820MAX
-#endif
+
#include <uapi/asm/e820.h>
+
#ifndef __ASSEMBLY__
/* see comment in arch/x86/kernel/e820.c */
extern struct e820map *e820;
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 389d700b961e..e99675b9c861 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -210,12 +210,18 @@ static inline bool efi_is_64bit(void)
return __efi_early()->is64;
}
+#define efi_table_attr(table, attr, instance) \
+ (efi_is_64bit() ? \
+ ((table##_64_t *)(unsigned long)instance)->attr : \
+ ((table##_32_t *)(unsigned long)instance)->attr)
+
+#define efi_call_proto(protocol, f, instance, ...) \
+ __efi_early()->call(efi_table_attr(protocol, f, instance), \
+ instance, ##__VA_ARGS__)
+
#define efi_call_early(f, ...) \
- __efi_early()->call(efi_is_64bit() ? \
- ((efi_boot_services_64_t *)(unsigned long) \
- __efi_early()->boot_services)->f : \
- ((efi_boot_services_32_t *)(unsigned long) \
- __efi_early()->boot_services)->f, __VA_ARGS__)
+ __efi_early()->call(efi_table_attr(efi_boot_services, f, \
+ __efi_early()->boot_services), __VA_ARGS__)
#define __efi_call_early(f, ...) \
__efi_early()->call((unsigned long)f, __VA_ARGS__);
diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h
index 1c7eefe32502..7ec59edde154 100644
--- a/arch/x86/include/asm/floppy.h
+++ b/arch/x86/include/asm/floppy.h
@@ -229,18 +229,18 @@ static struct fd_routine_l {
int (*_dma_setup)(char *addr, unsigned long size, int mode, int io);
} fd_routine[] = {
{
- request_dma,
- free_dma,
- get_dma_residue,
- dma_mem_alloc,
- hard_dma_setup
+ ._request_dma = request_dma,
+ ._free_dma = free_dma,
+ ._get_dma_residue = get_dma_residue,
+ ._dma_mem_alloc = dma_mem_alloc,
+ ._dma_setup = hard_dma_setup
},
{
- vdma_request_dma,
- vdma_nop,
- vdma_get_dma_residue,
- vdma_mem_alloc,
- vdma_dma_setup
+ ._request_dma = vdma_request_dma,
+ ._free_dma = vdma_nop,
+ ._get_dma_residue = vdma_get_dma_residue,
+ ._dma_mem_alloc = vdma_mem_alloc,
+ ._dma_setup = vdma_dma_setup
}
};
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index 1429a7c736db..0877ae018fc9 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -27,16 +27,6 @@ extern void kernel_fpu_end(void);
extern bool irq_fpu_usable(void);
/*
- * Some instructions like VIA's padlock instructions generate a spurious
- * DNA fault but don't modify SSE registers. And these instructions
- * get used from interrupt context as well. To prevent these kernel instructions
- * in interrupt context interacting wrongly with other user/kernel fpu usage, we
- * should use them only in the context of irq_ts_save/restore()
- */
-extern int irq_ts_save(void);
-extern void irq_ts_restore(int TS_state);
-
-/*
* Query the presence of one or more xfeatures. Works on any legacy CPU as well.
*
* If 'feature_name' is set then put a human-readable description of
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 2737366ea583..d4a684997497 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -60,11 +60,6 @@ extern u64 fpu__get_supported_xfeatures_mask(void);
/*
* FPU related CPU feature flag helper routines:
*/
-static __always_inline __pure bool use_eager_fpu(void)
-{
- return static_cpu_has(X86_FEATURE_EAGER_FPU);
-}
-
static __always_inline __pure bool use_xsaveopt(void)
{
return static_cpu_has(X86_FEATURE_XSAVEOPT);
@@ -484,42 +479,42 @@ extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size)
DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
/*
- * Must be run with preemption disabled: this clears the fpu_fpregs_owner_ctx,
- * on this CPU.
+ * The in-register FPU state for an FPU context on a CPU is assumed to be
+ * valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx
+ * matches the FPU.
*
- * This will disable any lazy FPU state restore of the current FPU state,
- * but if the current thread owns the FPU, it will still be saved by.
+ * If the FPU register state is valid, the kernel can skip restoring the
+ * FPU state from memory.
+ *
+ * Any code that clobbers the FPU registers or updates the in-memory
+ * FPU state for a task MUST let the rest of the kernel know that the
+ * FPU registers are no longer valid for this task.
+ *
+ * Either one of these invalidation functions is enough. Invalidate
+ * a resource you control: CPU if using the CPU for something else
+ * (with preemption disabled), FPU for the current task, or a task that
+ * is prevented from running by the current task.
*/
-static inline void __cpu_disable_lazy_restore(unsigned int cpu)
+static inline void __cpu_invalidate_fpregs_state(void)
{
- per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
+ __this_cpu_write(fpu_fpregs_owner_ctx, NULL);
}
-static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu)
-{
- return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
-}
-
-
-/*
- * Wrap lazy FPU TS handling in a 'hw fpregs activation/deactivation'
- * idiom, which is then paired with the sw-flag (fpregs_active) later on:
- */
-
-static inline void __fpregs_activate_hw(void)
+static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu)
{
- if (!use_eager_fpu())
- clts();
+ fpu->last_cpu = -1;
}
-static inline void __fpregs_deactivate_hw(void)
+static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
{
- if (!use_eager_fpu())
- stts();
+ return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
}
-/* Must be paired with an 'stts' (fpregs_deactivate_hw()) after! */
-static inline void __fpregs_deactivate(struct fpu *fpu)
+/*
+ * These generally need preemption protection to work,
+ * do try to avoid using these on their own:
+ */
+static inline void fpregs_deactivate(struct fpu *fpu)
{
WARN_ON_FPU(!fpu->fpregs_active);
@@ -528,8 +523,7 @@ static inline void __fpregs_deactivate(struct fpu *fpu)
trace_x86_fpu_regs_deactivated(fpu);
}
-/* Must be paired with a 'clts' (fpregs_activate_hw()) before! */
-static inline void __fpregs_activate(struct fpu *fpu)
+static inline void fpregs_activate(struct fpu *fpu)
{
WARN_ON_FPU(fpu->fpregs_active);
@@ -554,51 +548,19 @@ static inline int fpregs_active(void)
}
/*
- * Encapsulate the CR0.TS handling together with the
- * software flag.
- *
- * These generally need preemption protection to work,
- * do try to avoid using these on their own.
- */
-static inline void fpregs_activate(struct fpu *fpu)
-{
- __fpregs_activate_hw();
- __fpregs_activate(fpu);
-}
-
-static inline void fpregs_deactivate(struct fpu *fpu)
-{
- __fpregs_deactivate(fpu);
- __fpregs_deactivate_hw();
-}
-
-/*
* FPU state switching for scheduling.
*
* This is a two-stage process:
*
- * - switch_fpu_prepare() saves the old state and
- * sets the new state of the CR0.TS bit. This is
- * done within the context of the old process.
+ * - switch_fpu_prepare() saves the old state.
+ * This is done within the context of the old process.
*
* - switch_fpu_finish() restores the new state as
* necessary.
*/
-typedef struct { int preload; } fpu_switch_t;
-
-static inline fpu_switch_t
-switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
+static inline void
+switch_fpu_prepare(struct fpu *old_fpu, int cpu)
{
- fpu_switch_t fpu;
-
- /*
- * If the task has used the math, pre-load the FPU on xsave processors
- * or if the past 5 consecutive context-switches used math.
- */
- fpu.preload = static_cpu_has(X86_FEATURE_FPU) &&
- new_fpu->fpstate_active &&
- (use_eager_fpu() || new_fpu->counter > 5);
-
if (old_fpu->fpregs_active) {
if (!copy_fpregs_to_fpstate(old_fpu))
old_fpu->last_cpu = -1;
@@ -608,29 +570,8 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
/* But leave fpu_fpregs_owner_ctx! */
old_fpu->fpregs_active = 0;
trace_x86_fpu_regs_deactivated(old_fpu);
-
- /* Don't change CR0.TS if we just switch! */
- if (fpu.preload) {
- new_fpu->counter++;
- __fpregs_activate(new_fpu);
- trace_x86_fpu_regs_activated(new_fpu);
- prefetch(&new_fpu->state);
- } else {
- __fpregs_deactivate_hw();
- }
- } else {
- old_fpu->counter = 0;
+ } else
old_fpu->last_cpu = -1;
- if (fpu.preload) {
- new_fpu->counter++;
- if (fpu_want_lazy_restore(new_fpu, cpu))
- fpu.preload = 0;
- else
- prefetch(&new_fpu->state);
- fpregs_activate(new_fpu);
- }
- }
- return fpu;
}
/*
@@ -638,15 +579,19 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
*/
/*
- * By the time this gets called, we've already cleared CR0.TS and
- * given the process the FPU if we are going to preload the FPU
- * state - all we need to do is to conditionally restore the register
- * state itself.
+ * Set up the userspace FPU context for the new task, if the task
+ * has used the FPU.
*/
-static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switch)
+static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu)
{
- if (fpu_switch.preload)
- copy_kernel_to_fpregs(&new_fpu->state);
+ bool preload = static_cpu_has(X86_FEATURE_FPU) &&
+ new_fpu->fpstate_active;
+
+ if (preload) {
+ if (!fpregs_state_valid(new_fpu, cpu))
+ copy_kernel_to_fpregs(&new_fpu->state);
+ fpregs_activate(new_fpu);
+ }
}
/*
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 48df486b02f9..3c80f5b9c09d 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -322,17 +322,6 @@ struct fpu {
unsigned char fpregs_active;
/*
- * @counter:
- *
- * This counter contains the number of consecutive context switches
- * during which the FPU stays used. If this is over a threshold, the
- * lazy FPU restore logic becomes eager, to save the trap overhead.
- * This is an unsigned char so that after 256 iterations the counter
- * wraps and the context switch behavior turns lazy again; this is to
- * deal with bursty apps that only use the FPU for a short time:
- */
- unsigned char counter;
- /*
* @state:
*
* In-memory copy of all FPU registers that we save/restore
@@ -340,29 +329,6 @@ struct fpu {
* the registers in the FPU are more recent than this state
* copy. If the task context-switches away then they get
* saved here and represent the FPU state.
- *
- * After context switches there may be a (short) time period
- * during which the in-FPU hardware registers are unchanged
- * and still perfectly match this state, if the tasks
- * scheduled afterwards are not using the FPU.
- *
- * This is the 'lazy restore' window of optimization, which
- * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'.
- *
- * We detect whether a subsequent task uses the FPU via setting
- * CR0::TS to 1, which causes any FPU use to raise a #NM fault.
- *
- * During this window, if the task gets scheduled again, we
- * might be able to skip having to do a restore from this
- * memory buffer to the hardware registers - at the cost of
- * incurring the overhead of #NM fault traps.
- *
- * Note that on modern CPUs that support the XSAVEOPT (or other
- * optimized XSAVE instructions), we don't use #NM traps anymore,
- * as the hardware can track whether FPU registers need saving
- * or not. On such CPUs we activate the non-lazy ('eagerfpu')
- * logic, which unconditionally saves/restores all FPU state
- * across context switches. (if FPU state exists.)
*/
union fpregs_state state;
/*
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 430bacf73074..1b2799e0699a 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -21,21 +21,16 @@
/* Supervisor features */
#define XFEATURE_MASK_SUPERVISOR (XFEATURE_MASK_PT)
-/* Supported features which support lazy state saving */
-#define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \
+/* All currently supported features */
+#define XCNTXT_MASK (XFEATURE_MASK_FP | \
XFEATURE_MASK_SSE | \
XFEATURE_MASK_YMM | \
XFEATURE_MASK_OPMASK | \
XFEATURE_MASK_ZMM_Hi256 | \
- XFEATURE_MASK_Hi16_ZMM)
-
-/* Supported features which require eager state saving */
-#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | \
- XFEATURE_MASK_BNDCSR | \
- XFEATURE_MASK_PKRU)
-
-/* All currently supported features */
-#define XCNTXT_MASK (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER)
+ XFEATURE_MASK_Hi16_ZMM | \
+ XFEATURE_MASK_PKRU | \
+ XFEATURE_MASK_BNDREGS | \
+ XFEATURE_MASK_BNDCSR)
#ifdef CONFIG_X86_64
#define REX_PREFIX "0x48, "
diff --git a/arch/x86/include/asm/idle.h b/arch/x86/include/asm/idle.h
deleted file mode 100644
index c5d1785373ed..000000000000
--- a/arch/x86/include/asm/idle.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef _ASM_X86_IDLE_H
-#define _ASM_X86_IDLE_H
-
-#define IDLE_START 1
-#define IDLE_END 2
-
-struct notifier_block;
-void idle_notifier_register(struct notifier_block *n);
-void idle_notifier_unregister(struct notifier_block *n);
-
-#ifdef CONFIG_X86_64
-void enter_idle(void);
-void exit_idle(void);
-#else /* !CONFIG_X86_64 */
-static inline void enter_idle(void) { }
-static inline void exit_idle(void) { }
-static inline void __exit_idle(void) { }
-#endif /* CONFIG_X86_64 */
-
-void amd_e400_remove_cpu(int cpu);
-
-#endif /* _ASM_X86_IDLE_H */
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
new file mode 100644
index 000000000000..95ce5c85b009
--- /dev/null
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -0,0 +1,224 @@
+#ifndef _ASM_X86_INTEL_RDT_H
+#define _ASM_X86_INTEL_RDT_H
+
+#ifdef CONFIG_INTEL_RDT_A
+
+#include <linux/kernfs.h>
+#include <linux/jump_label.h>
+
+#include <asm/intel_rdt_common.h>
+
+#define IA32_L3_QOS_CFG 0xc81
+#define IA32_L3_CBM_BASE 0xc90
+#define IA32_L2_CBM_BASE 0xd10
+
+#define L3_QOS_CDP_ENABLE 0x01ULL
+
+/**
+ * struct rdtgroup - store rdtgroup's data in resctrl file system.
+ * @kn: kernfs node
+ * @rdtgroup_list: linked list for all rdtgroups
+ * @closid: closid for this rdtgroup
+ * @cpu_mask: CPUs assigned to this rdtgroup
+ * @flags: status bits
+ * @waitcount: how many cpus expect to find this
+ * group when they acquire rdtgroup_mutex
+ */
+struct rdtgroup {
+ struct kernfs_node *kn;
+ struct list_head rdtgroup_list;
+ int closid;
+ struct cpumask cpu_mask;
+ int flags;
+ atomic_t waitcount;
+};
+
+/* rdtgroup.flags */
+#define RDT_DELETED 1
+
+/* List of all resource groups */
+extern struct list_head rdt_all_groups;
+
+int __init rdtgroup_init(void);
+
+/**
+ * struct rftype - describe each file in the resctrl file system
+ * @name: file name
+ * @mode: access mode
+ * @kf_ops: operations
+ * @seq_show: show content of the file
+ * @write: write to the file
+ */
+struct rftype {
+ char *name;
+ umode_t mode;
+ struct kernfs_ops *kf_ops;
+
+ int (*seq_show)(struct kernfs_open_file *of,
+ struct seq_file *sf, void *v);
+ /*
+ * write() is the generic write callback which maps directly to
+ * kernfs write operation and overrides all other operations.
+ * Maximum write size is determined by ->max_write_len.
+ */
+ ssize_t (*write)(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off);
+};
+
+/**
+ * struct rdt_resource - attributes of an RDT resource
+ * @enabled: Is this feature enabled on this machine
+ * @capable: Is this feature available on this machine
+ * @name: Name to use in "schemata" file
+ * @num_closid: Number of CLOSIDs available
+ * @max_cbm: Largest Cache Bit Mask allowed
+ * @min_cbm_bits: Minimum number of consecutive bits to be set
+ * in a cache bit mask
+ * @domains: All domains for this resource
+ * @num_domains: Number of domains active
+ * @msr_base: Base MSR address for CBMs
+ * @tmp_cbms: Scratch space when updating schemata
+ * @num_tmp_cbms: Number of CBMs in tmp_cbms
+ * @cache_level: Which cache level defines scope of this domain
+ * @cbm_idx_multi: Multiplier of CBM index
+ * @cbm_idx_offset: Offset of CBM index. CBM index is computed by:
+ * closid * cbm_idx_multi + cbm_idx_offset
+ */
+struct rdt_resource {
+ bool enabled;
+ bool capable;
+ char *name;
+ int num_closid;
+ int cbm_len;
+ int min_cbm_bits;
+ u32 max_cbm;
+ struct list_head domains;
+ int num_domains;
+ int msr_base;
+ u32 *tmp_cbms;
+ int num_tmp_cbms;
+ int cache_level;
+ int cbm_idx_multi;
+ int cbm_idx_offset;
+};
+
+/**
+ * struct rdt_domain - group of cpus sharing an RDT resource
+ * @list: all instances of this resource
+ * @id: unique id for this instance
+ * @cpu_mask: which cpus share this resource
+ * @cbm: array of cache bit masks (indexed by CLOSID)
+ */
+struct rdt_domain {
+ struct list_head list;
+ int id;
+ struct cpumask cpu_mask;
+ u32 *cbm;
+};
+
+/**
+ * struct msr_param - set a range of MSRs from a domain
+ * @res: The resource to use
+ * @low: Beginning index from base MSR
+ * @high: End index
+ */
+struct msr_param {
+ struct rdt_resource *res;
+ int low;
+ int high;
+};
+
+extern struct mutex rdtgroup_mutex;
+
+extern struct rdt_resource rdt_resources_all[];
+extern struct rdtgroup rdtgroup_default;
+DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
+
+int __init rdtgroup_init(void);
+
+enum {
+ RDT_RESOURCE_L3,
+ RDT_RESOURCE_L3DATA,
+ RDT_RESOURCE_L3CODE,
+ RDT_RESOURCE_L2,
+
+ /* Must be the last */
+ RDT_NUM_RESOURCES,
+};
+
+#define for_each_capable_rdt_resource(r) \
+ for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
+ r++) \
+ if (r->capable)
+
+#define for_each_enabled_rdt_resource(r) \
+ for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
+ r++) \
+ if (r->enabled)
+
+/* CPUID.(EAX=10H, ECX=ResID=1).EAX */
+union cpuid_0x10_1_eax {
+ struct {
+ unsigned int cbm_len:5;
+ } split;
+ unsigned int full;
+};
+
+/* CPUID.(EAX=10H, ECX=ResID=1).EDX */
+union cpuid_0x10_1_edx {
+ struct {
+ unsigned int cos_max:16;
+ } split;
+ unsigned int full;
+};
+
+DECLARE_PER_CPU_READ_MOSTLY(int, cpu_closid);
+
+void rdt_cbm_update(void *arg);
+struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
+void rdtgroup_kn_unlock(struct kernfs_node *kn);
+ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off);
+int rdtgroup_schemata_show(struct kernfs_open_file *of,
+ struct seq_file *s, void *v);
+
+/*
+ * intel_rdt_sched_in() - Writes the task's CLOSid to IA32_PQR_MSR
+ *
+ * Following considerations are made so that this has minimal impact
+ * on scheduler hot path:
+ * - This will stay as no-op unless we are running on an Intel SKU
+ * which supports resource control and we enable by mounting the
+ * resctrl file system.
+ * - Caches the per cpu CLOSid values and does the MSR write only
+ * when a task with a different CLOSid is scheduled in.
+ *
+ * Must be called with preemption disabled.
+ */
+static inline void intel_rdt_sched_in(void)
+{
+ if (static_branch_likely(&rdt_enable_key)) {
+ struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+ int closid;
+
+ /*
+ * If this task has a closid assigned, use it.
+ * Else use the closid assigned to this cpu.
+ */
+ closid = current->closid;
+ if (closid == 0)
+ closid = this_cpu_read(cpu_closid);
+
+ if (closid != state->closid) {
+ state->closid = closid;
+ wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, closid);
+ }
+ }
+}
+
+#else
+
+static inline void intel_rdt_sched_in(void) {}
+
+#endif /* CONFIG_INTEL_RDT_A */
+#endif /* _ASM_X86_INTEL_RDT_H */
diff --git a/arch/x86/include/asm/intel_rdt_common.h b/arch/x86/include/asm/intel_rdt_common.h
new file mode 100644
index 000000000000..b31081b89407
--- /dev/null
+++ b/arch/x86/include/asm/intel_rdt_common.h
@@ -0,0 +1,27 @@
+#ifndef _ASM_X86_INTEL_RDT_COMMON_H
+#define _ASM_X86_INTEL_RDT_COMMON_H
+
+#define MSR_IA32_PQR_ASSOC 0x0c8f
+
+/**
+ * struct intel_pqr_state - State cache for the PQR MSR
+ * @rmid: The cached Resource Monitoring ID
+ * @closid: The cached Class Of Service ID
+ * @rmid_usecnt: The usage counter for rmid
+ *
+ * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
+ * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
+ * contains both parts, so we need to cache them.
+ *
+ * The cache also helps to avoid pointless updates if the value does
+ * not change.
+ */
+struct intel_pqr_state {
+ u32 rmid;
+ u32 closid;
+ int rmid_usecnt;
+};
+
+DECLARE_PER_CPU(struct intel_pqr_state, pqr_state);
+
+#endif /* _ASM_X86_INTEL_RDT_COMMON_H */
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index d31881188431..29a594a3b82a 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -21,7 +21,6 @@ enum die_val {
DIE_NMIUNKNOWN,
};
-extern void printk_address(unsigned long address);
extern void die(const char *, struct pt_regs *,long);
extern int __must_check __die(const char *, struct pt_regs *, long);
extern void show_stack_regs(struct pt_regs *regs);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index bdde80731f49..2e25038dbd93 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -191,6 +191,8 @@ enum {
#define PFERR_RSVD_BIT 3
#define PFERR_FETCH_BIT 4
#define PFERR_PK_BIT 5
+#define PFERR_GUEST_FINAL_BIT 32
+#define PFERR_GUEST_PAGE_BIT 33
#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT)
#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT)
@@ -198,6 +200,13 @@ enum {
#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
#define PFERR_PK_MASK (1U << PFERR_PK_BIT)
+#define PFERR_GUEST_FINAL_MASK (1ULL << PFERR_GUEST_FINAL_BIT)
+#define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT)
+
+#define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \
+ PFERR_USER_MASK | \
+ PFERR_WRITE_MASK | \
+ PFERR_PRESENT_MASK)
/* apic attention bits */
#define KVM_APIC_CHECK_VAPIC 0
@@ -695,6 +704,7 @@ struct kvm_apic_map {
/* Hyper-V emulation context */
struct kvm_hv {
+ struct mutex hv_lock;
u64 hv_guest_os_id;
u64 hv_hypercall;
u64 hv_tsc_page;
@@ -1062,6 +1072,7 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
+bool pdptrs_changed(struct kvm_vcpu *vcpu);
int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
const void *val, int bytes);
@@ -1124,7 +1135,8 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
struct x86_emulate_ctxt;
int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port);
-void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
+int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port);
+int kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
int kvm_emulate_halt(struct kvm_vcpu *vcpu);
int kvm_vcpu_halt(struct kvm_vcpu *vcpu);
int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
@@ -1203,7 +1215,7 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu);
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
-int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,
+int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code,
void *insn, int insn_len);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu);
@@ -1358,7 +1370,8 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
-void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
+int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
+int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
int kvm_is_in_guest(void);
diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h
index c2b8d24a235c..d74747b031ec 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -29,9 +29,20 @@ struct kvm_page_track_notifier_node {
* @gpa: the physical address written by guest.
* @new: the data was written to the address.
* @bytes: the written length.
+ * @node: this node
*/
void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
- int bytes);
+ int bytes, struct kvm_page_track_notifier_node *node);
+ /*
+ * It is called when memory slot is being moved or removed
+ * users can drop write-protection for the pages in that memory slot
+ *
+ * @kvm: the kvm where memory slot being moved or removed
+ * @slot: the memory slot being moved or removed
+ * @node: this node
+ */
+ void (*track_flush_slot)(struct kvm *kvm, struct kvm_memory_slot *slot,
+ struct kvm_page_track_notifier_node *node);
};
void kvm_page_track_init(struct kvm *kvm);
@@ -58,4 +69,5 @@ kvm_page_track_unregister_notifier(struct kvm *kvm,
struct kvm_page_track_notifier_node *n);
void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
int bytes);
+void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot);
#endif
diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h
index ef01fef3eebc..6c119cfae218 100644
--- a/arch/x86/include/asm/lguest_hcall.h
+++ b/arch/x86/include/asm/lguest_hcall.h
@@ -9,7 +9,6 @@
#define LHCALL_FLUSH_TLB 5
#define LHCALL_LOAD_IDT_ENTRY 6
#define LHCALL_SET_STACK 7
-#define LHCALL_TS 8
#define LHCALL_SET_CLOCKEVENT 9
#define LHCALL_HALT 10
#define LHCALL_SET_PMD 13
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 9bd7ff5ffbcc..5132f2a6c0a2 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -252,8 +252,10 @@ static inline void cmci_recheck(void) {}
#ifdef CONFIG_X86_MCE_AMD
void mce_amd_feature_init(struct cpuinfo_x86 *c);
+int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr);
#else
static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
+static inline int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return -EINVAL; };
#endif
int mce_available(struct cpuinfo_x86 *c);
@@ -293,9 +295,7 @@ void do_machine_check(struct pt_regs *, long);
/*
* Threshold handler
*/
-
extern void (*mce_threshold_vector)(void);
-extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
/* Deferred error interrupt handler */
extern void (*deferred_error_int_vector)(void);
@@ -356,27 +356,31 @@ enum smca_bank_types {
N_SMCA_BANK_TYPES
};
-struct smca_bank_name {
- const char *name; /* Short name for sysfs */
- const char *long_name; /* Long name for pretty-printing */
-};
-
-extern struct smca_bank_name smca_bank_names[N_SMCA_BANK_TYPES];
+#define HWID_MCATYPE(hwid, mcatype) (((hwid) << 16) | (mcatype))
-#define HWID_MCATYPE(hwid, mcatype) ((hwid << 16) | mcatype)
-
-struct smca_hwid_mcatype {
+struct smca_hwid {
unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */
u32 hwid_mcatype; /* (hwid,mcatype) tuple */
u32 xec_bitmap; /* Bitmap of valid ExtErrorCodes; current max is 21. */
};
-struct smca_bank_info {
- struct smca_hwid_mcatype *type;
- u32 type_instance;
+struct smca_bank {
+ struct smca_hwid *hwid;
+ /* Instance ID */
+ u32 id;
};
-extern struct smca_bank_info smca_banks[MAX_NR_BANKS];
+extern struct smca_bank smca_banks[MAX_NR_BANKS];
+
+extern const char *smca_get_long_name(enum smca_bank_types t);
+
+extern int mce_threshold_create_device(unsigned int cpu);
+extern int mce_threshold_remove_device(unsigned int cpu);
+
+#else
+
+static inline int mce_threshold_create_device(unsigned int cpu) { return 0; };
+static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; };
#endif
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index da0d81fa0b54..38711df3bcb5 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -20,6 +20,15 @@ do { \
(u32)((u64)(val)), \
(u32)((u64)(val) >> 32))
+struct ucode_patch {
+ struct list_head plist;
+ void *data; /* Intel uses only this one */
+ u32 patch_id;
+ u16 equiv_cpu;
+};
+
+extern struct list_head microcode_cache;
+
struct cpu_signature {
unsigned int sig;
unsigned int pf;
@@ -55,12 +64,7 @@ struct ucode_cpu_info {
void *mc;
};
extern struct ucode_cpu_info ucode_cpu_info[];
-
-#ifdef CONFIG_MICROCODE
-int __init microcode_init(void);
-#else
-static inline int __init microcode_init(void) { return 0; };
-#endif
+struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa);
#ifdef CONFIG_MICROCODE_INTEL
extern struct microcode_ops * __init init_intel_microcode(void);
@@ -131,11 +135,13 @@ static inline unsigned int x86_cpuid_family(void)
}
#ifdef CONFIG_MICROCODE
+int __init microcode_init(void);
extern void __init load_ucode_bsp(void);
extern void load_ucode_ap(void);
void reload_early_microcode(void);
extern bool get_builtin_firmware(struct cpio_data *cd, const char *name);
#else
+static inline int __init microcode_init(void) { return 0; };
static inline void __init load_ucode_bsp(void) { }
static inline void load_ucode_ap(void) { }
static inline void reload_early_microcode(void) { }
diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
index 15eb75484cc0..3e3e20be829a 100644
--- a/arch/x86/include/asm/microcode_amd.h
+++ b/arch/x86/include/asm/microcode_amd.h
@@ -40,38 +40,18 @@ struct microcode_amd {
unsigned int mpb[0];
};
-static inline u16 find_equiv_id(struct equiv_cpu_entry *equiv_cpu_table,
- unsigned int sig)
-{
- int i = 0;
-
- if (!equiv_cpu_table)
- return 0;
-
- while (equiv_cpu_table[i].installed_cpu != 0) {
- if (sig == equiv_cpu_table[i].installed_cpu)
- return equiv_cpu_table[i].equiv_cpu;
-
- i++;
- }
- return 0;
-}
-
-extern int __apply_microcode_amd(struct microcode_amd *mc_amd);
-extern int apply_microcode_amd(int cpu);
-extern enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size);
-
#define PATCH_MAX_SIZE PAGE_SIZE
#ifdef CONFIG_MICROCODE_AMD
extern void __init load_ucode_amd_bsp(unsigned int family);
-extern void load_ucode_amd_ap(void);
-extern int __init save_microcode_in_initrd_amd(void);
+extern void load_ucode_amd_ap(unsigned int family);
+extern int __init save_microcode_in_initrd_amd(unsigned int family);
void reload_ucode_amd(void);
#else
static inline void __init load_ucode_amd_bsp(unsigned int family) {}
-static inline void load_ucode_amd_ap(void) {}
-static inline int __init save_microcode_in_initrd_amd(void) { return -EINVAL; }
+static inline void load_ucode_amd_ap(unsigned int family) {}
+static inline int __init
+save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; }
void reload_ucode_amd(void) {}
#endif
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
index 5e69154c9f07..195becc6f780 100644
--- a/arch/x86/include/asm/microcode_intel.h
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -52,10 +52,6 @@ struct extended_sigtable {
#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
-extern int has_newer_microcode(void *mc, unsigned int csig, int cpf, int rev);
-extern int microcode_sanity_check(void *mc, int print_err);
-extern int find_matching_signature(void *mc, unsigned int csig, int cpf);
-
#ifdef CONFIG_MICROCODE_INTEL
extern void __init load_ucode_intel_bsp(void);
extern void load_ucode_intel_ap(void);
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 72198c64e646..f9813b6d8b80 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -31,6 +31,10 @@ typedef struct {
u16 pkey_allocation_map;
s16 execute_only_pkey;
#endif
+#ifdef CONFIG_X86_INTEL_MPX
+ /* address of the bounds directory */
+ void __user *bd_addr;
+#endif
} mm_context_t;
#ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 8e0a9fe86de4..306c7e12af55 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -47,7 +47,7 @@ struct ldt_struct {
* allocations, but it's not worth trying to optimize.
*/
struct desc_struct *entries;
- int size;
+ unsigned int size;
};
/*
diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h
index 7a35495275a9..0b416d4cf73b 100644
--- a/arch/x86/include/asm/mpx.h
+++ b/arch/x86/include/asm/mpx.h
@@ -59,7 +59,7 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs);
int mpx_handle_bd_fault(void);
static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
{
- return (mm->bd_addr != MPX_INVALID_BOUNDS_DIR);
+ return (mm->context.bd_addr != MPX_INVALID_BOUNDS_DIR);
}
static inline void mpx_mm_init(struct mm_struct *mm)
{
@@ -67,7 +67,7 @@ static inline void mpx_mm_init(struct mm_struct *mm)
* NULL is theoretically a valid place to put the bounds
* directory, so point this at an invalid address.
*/
- mm->bd_addr = MPX_INVALID_BOUNDS_DIR;
+ mm->context.bd_addr = MPX_INVALID_BOUNDS_DIR;
}
void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long start, unsigned long end);
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 78f3760ca1f2..710273c617b8 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -37,6 +37,10 @@
#define EFER_FFXSR (1<<_EFER_FFXSR)
/* Intel MSRs. Some also available on other CPUs */
+
+#define MSR_PPIN_CTL 0x0000004e
+#define MSR_PPIN 0x0000004f
+
#define MSR_IA32_PERFCTR0 0x000000c1
#define MSR_IA32_PERFCTR1 0x000000c2
#define MSR_FSB_FREQ 0x000000cd
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index b5fee97813cd..db0b90c3b03e 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -70,14 +70,14 @@ extern struct tracepoint __tracepoint_read_msr;
extern struct tracepoint __tracepoint_write_msr;
extern struct tracepoint __tracepoint_rdpmc;
#define msr_tracepoint_active(t) static_key_false(&(t).key)
-extern void do_trace_write_msr(unsigned msr, u64 val, int failed);
-extern void do_trace_read_msr(unsigned msr, u64 val, int failed);
-extern void do_trace_rdpmc(unsigned msr, u64 val, int failed);
+extern void do_trace_write_msr(unsigned int msr, u64 val, int failed);
+extern void do_trace_read_msr(unsigned int msr, u64 val, int failed);
+extern void do_trace_rdpmc(unsigned int msr, u64 val, int failed);
#else
#define msr_tracepoint_active(t) false
-static inline void do_trace_write_msr(unsigned msr, u64 val, int failed) {}
-static inline void do_trace_read_msr(unsigned msr, u64 val, int failed) {}
-static inline void do_trace_rdpmc(unsigned msr, u64 val, int failed) {}
+static inline void do_trace_write_msr(unsigned int msr, u64 val, int failed) {}
+static inline void do_trace_read_msr(unsigned int msr, u64 val, int failed) {}
+static inline void do_trace_rdpmc(unsigned int msr, u64 val, int failed) {}
#endif
static inline unsigned long long native_read_msr(unsigned int msr)
@@ -115,22 +115,36 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr,
}
/* Can be uninlined because referenced by paravirt */
-notrace static inline void native_write_msr(unsigned int msr,
- unsigned low, unsigned high)
+static inline void notrace
+__native_write_msr_notrace(unsigned int msr, u32 low, u32 high)
{
asm volatile("1: wrmsr\n"
"2:\n"
_ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe)
: : "c" (msr), "a"(low), "d" (high) : "memory");
+}
+
+/* Can be uninlined because referenced by paravirt */
+static inline void notrace
+native_write_msr(unsigned int msr, u32 low, u32 high)
+{
+ __native_write_msr_notrace(msr, low, high);
if (msr_tracepoint_active(__tracepoint_write_msr))
do_trace_write_msr(msr, ((u64)high << 32 | low), 0);
}
+static inline void
+wrmsr_notrace(unsigned int msr, u32 low, u32 high)
+{
+ __native_write_msr_notrace(msr, low, high);
+}
+
/* Can be uninlined because referenced by paravirt */
-notrace static inline int native_write_msr_safe(unsigned int msr,
- unsigned low, unsigned high)
+static inline int notrace
+native_write_msr_safe(unsigned int msr, u32 low, u32 high)
{
int err;
+
asm volatile("2: wrmsr ; xor %[err],%[err]\n"
"1:\n\t"
".section .fixup,\"ax\"\n\t"
@@ -223,7 +237,7 @@ do { \
(void)((high) = (u32)(__val >> 32)); \
} while (0)
-static inline void wrmsr(unsigned msr, unsigned low, unsigned high)
+static inline void wrmsr(unsigned int msr, u32 low, u32 high)
{
native_write_msr(msr, low, high);
}
@@ -231,13 +245,13 @@ static inline void wrmsr(unsigned msr, unsigned low, unsigned high)
#define rdmsrl(msr, val) \
((val) = native_read_msr((msr)))
-static inline void wrmsrl(unsigned msr, u64 val)
+static inline void wrmsrl(unsigned int msr, u64 val)
{
native_write_msr(msr, (u32)(val & 0xffffffffULL), (u32)(val >> 32));
}
/* wrmsr with exception handling */
-static inline int wrmsr_safe(unsigned msr, unsigned low, unsigned high)
+static inline int wrmsr_safe(unsigned int msr, u32 low, u32 high)
{
return native_write_msr_safe(msr, low, high);
}
@@ -252,7 +266,7 @@ static inline int wrmsr_safe(unsigned msr, unsigned low, unsigned high)
__err; \
})
-static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
+static inline int rdmsrl_safe(unsigned int msr, unsigned long long *p)
{
int err;
@@ -325,12 +339,12 @@ static inline int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
static inline void rdmsr_on_cpus(const struct cpumask *m, u32 msr_no,
struct msr *msrs)
{
- rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h));
+ rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h));
}
static inline void wrmsr_on_cpus(const struct cpumask *m, u32 msr_no,
struct msr *msrs)
{
- wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h);
+ wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h);
}
static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no,
u32 *l, u32 *h)
diff --git a/arch/x86/include/asm/mutex.h b/arch/x86/include/asm/mutex.h
deleted file mode 100644
index 7d3a48275394..000000000000
--- a/arch/x86/include/asm/mutex.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#ifdef CONFIG_X86_32
-# include <asm/mutex_32.h>
-#else
-# include <asm/mutex_64.h>
-#endif
diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h
deleted file mode 100644
index e9355a84fc67..000000000000
--- a/arch/x86/include/asm/mutex_32.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Assembly implementation of the mutex fastpath, based on atomic
- * decrement/increment.
- *
- * started by Ingo Molnar:
- *
- * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
- */
-#ifndef _ASM_X86_MUTEX_32_H
-#define _ASM_X86_MUTEX_32_H
-
-#include <asm/alternative.h>
-
-/**
- * __mutex_fastpath_lock - try to take the lock by moving the count
- * from 1 to a 0 value
- * @count: pointer of type atomic_t
- * @fn: function to call if the original value was not 1
- *
- * Change the count from 1 to a value lower than 1, and call <fn> if it
- * wasn't 1 originally. This function MUST leave the value lower than 1
- * even when the "1" assertion wasn't true.
- */
-#define __mutex_fastpath_lock(count, fail_fn) \
-do { \
- unsigned int dummy; \
- \
- typecheck(atomic_t *, count); \
- typecheck_fn(void (*)(atomic_t *), fail_fn); \
- \
- asm volatile(LOCK_PREFIX " decl (%%eax)\n" \
- " jns 1f \n" \
- " call " #fail_fn "\n" \
- "1:\n" \
- : "=a" (dummy) \
- : "a" (count) \
- : "memory", "ecx", "edx"); \
-} while (0)
-
-
-/**
- * __mutex_fastpath_lock_retval - try to take the lock by moving the count
- * from 1 to a 0 value
- * @count: pointer of type atomic_t
- *
- * Change the count from 1 to a value lower than 1. This function returns 0
- * if the fastpath succeeds, or -1 otherwise.
- */
-static inline int __mutex_fastpath_lock_retval(atomic_t *count)
-{
- if (unlikely(atomic_dec_return(count) < 0))
- return -1;
- else
- return 0;
-}
-
-/**
- * __mutex_fastpath_unlock - try to promote the mutex from 0 to 1
- * @count: pointer of type atomic_t
- * @fail_fn: function to call if the original value was not 0
- *
- * try to promote the mutex from 0 to 1. if it wasn't 0, call <fail_fn>.
- * In the failure case, this function is allowed to either set the value
- * to 1, or to set it to a value lower than 1.
- *
- * If the implementation sets it to a value of lower than 1, the
- * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs
- * to return 0 otherwise.
- */
-#define __mutex_fastpath_unlock(count, fail_fn) \
-do { \
- unsigned int dummy; \
- \
- typecheck(atomic_t *, count); \
- typecheck_fn(void (*)(atomic_t *), fail_fn); \
- \
- asm volatile(LOCK_PREFIX " incl (%%eax)\n" \
- " jg 1f\n" \
- " call " #fail_fn "\n" \
- "1:\n" \
- : "=a" (dummy) \
- : "a" (count) \
- : "memory", "ecx", "edx"); \
-} while (0)
-
-#define __mutex_slowpath_needs_to_unlock() 1
-
-/**
- * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
- *
- * @count: pointer of type atomic_t
- * @fail_fn: fallback function
- *
- * Change the count from 1 to a value lower than 1, and return 0 (failure)
- * if it wasn't 1 originally, or return 1 (success) otherwise. This function
- * MUST leave the value lower than 1 even when the "1" assertion wasn't true.
- * Additionally, if the value was < 0 originally, this function must not leave
- * it to 0 on failure.
- */
-static inline int __mutex_fastpath_trylock(atomic_t *count,
- int (*fail_fn)(atomic_t *))
-{
- /* cmpxchg because it never induces a false contention state. */
- if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
- return 1;
-
- return 0;
-}
-
-#endif /* _ASM_X86_MUTEX_32_H */
diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h
deleted file mode 100644
index d9850758464e..000000000000
--- a/arch/x86/include/asm/mutex_64.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Assembly implementation of the mutex fastpath, based on atomic
- * decrement/increment.
- *
- * started by Ingo Molnar:
- *
- * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
- */
-#ifndef _ASM_X86_MUTEX_64_H
-#define _ASM_X86_MUTEX_64_H
-
-/**
- * __mutex_fastpath_lock - decrement and call function if negative
- * @v: pointer of type atomic_t
- * @fail_fn: function to call if the result is negative
- *
- * Atomically decrements @v and calls <fail_fn> if the result is negative.
- */
-#ifdef CC_HAVE_ASM_GOTO
-static inline void __mutex_fastpath_lock(atomic_t *v,
- void (*fail_fn)(atomic_t *))
-{
- asm_volatile_goto(LOCK_PREFIX " decl %0\n"
- " jns %l[exit]\n"
- : : "m" (v->counter)
- : "memory", "cc"
- : exit);
- fail_fn(v);
-exit:
- return;
-}
-#else
-#define __mutex_fastpath_lock(v, fail_fn) \
-do { \
- unsigned long dummy; \
- \
- typecheck(atomic_t *, v); \
- typecheck_fn(void (*)(atomic_t *), fail_fn); \
- \
- asm volatile(LOCK_PREFIX " decl (%%rdi)\n" \
- " jns 1f \n" \
- " call " #fail_fn "\n" \
- "1:" \
- : "=D" (dummy) \
- : "D" (v) \
- : "rax", "rsi", "rdx", "rcx", \
- "r8", "r9", "r10", "r11", "memory"); \
-} while (0)
-#endif
-
-/**
- * __mutex_fastpath_lock_retval - try to take the lock by moving the count
- * from 1 to a 0 value
- * @count: pointer of type atomic_t
- *
- * Change the count from 1 to a value lower than 1. This function returns 0
- * if the fastpath succeeds, or -1 otherwise.
- */
-static inline int __mutex_fastpath_lock_retval(atomic_t *count)
-{
- if (unlikely(atomic_dec_return(count) < 0))
- return -1;
- else
- return 0;
-}
-
-/**
- * __mutex_fastpath_unlock - increment and call function if nonpositive
- * @v: pointer of type atomic_t
- * @fail_fn: function to call if the result is nonpositive
- *
- * Atomically increments @v and calls <fail_fn> if the result is nonpositive.
- */
-#ifdef CC_HAVE_ASM_GOTO
-static inline void __mutex_fastpath_unlock(atomic_t *v,
- void (*fail_fn)(atomic_t *))
-{
- asm_volatile_goto(LOCK_PREFIX " incl %0\n"
- " jg %l[exit]\n"
- : : "m" (v->counter)
- : "memory", "cc"
- : exit);
- fail_fn(v);
-exit:
- return;
-}
-#else
-#define __mutex_fastpath_unlock(v, fail_fn) \
-do { \
- unsigned long dummy; \
- \
- typecheck(atomic_t *, v); \
- typecheck_fn(void (*)(atomic_t *), fail_fn); \
- \
- asm volatile(LOCK_PREFIX " incl (%%rdi)\n" \
- " jg 1f\n" \
- " call " #fail_fn "\n" \
- "1:" \
- : "=D" (dummy) \
- : "D" (v) \
- : "rax", "rsi", "rdx", "rcx", \
- "r8", "r9", "r10", "r11", "memory"); \
-} while (0)
-#endif
-
-#define __mutex_slowpath_needs_to_unlock() 1
-
-/**
- * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
- *
- * @count: pointer of type atomic_t
- * @fail_fn: fallback function
- *
- * Change the count from 1 to 0 and return 1 (success), or return 0 (failure)
- * if it wasn't 1 originally. [the fallback function is never used on
- * x86_64, because all x86_64 CPUs have a CMPXCHG instruction.]
- */
-static inline int __mutex_fastpath_trylock(atomic_t *count,
- int (*fail_fn)(atomic_t *))
-{
- if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
- return 1;
-
- return 0;
-}
-
-#endif /* _ASM_X86_MUTEX_64_H */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index ce932812f142..1eea6ca40694 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -41,11 +41,6 @@ static inline void set_debugreg(unsigned long val, int reg)
PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val);
}
-static inline void clts(void)
-{
- PVOP_VCALL0(pv_cpu_ops.clts);
-}
-
static inline unsigned long read_cr0(void)
{
return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0);
@@ -678,6 +673,11 @@ static __always_inline void pv_kick(int cpu)
PVOP_VCALL1(pv_lock_ops.kick, cpu);
}
+static __always_inline bool pv_vcpu_is_preempted(int cpu)
+{
+ return PVOP_CALLEE1(bool, pv_lock_ops.vcpu_is_preempted, cpu);
+}
+
#endif /* SMP && PARAVIRT_SPINLOCKS */
#ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 0f400c0e4979..bb2de45a60f2 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -103,8 +103,6 @@ struct pv_cpu_ops {
unsigned long (*get_debugreg)(int regno);
void (*set_debugreg)(int regno, unsigned long value);
- void (*clts)(void);
-
unsigned long (*read_cr0)(void);
void (*write_cr0)(unsigned long);
@@ -310,6 +308,8 @@ struct pv_lock_ops {
void (*wait)(u8 *ptr, u8 val);
void (*kick)(int cpu);
+
+ struct paravirt_callee_save vcpu_is_preempted;
};
/* This contains all the paravirt structures: we get a convenient
@@ -508,6 +508,18 @@ int paravirt_disable_iospace(void);
#define PVOP_TEST_NULL(op) ((void)op)
#endif
+#define PVOP_RETMASK(rettype) \
+ ({ unsigned long __mask = ~0UL; \
+ switch (sizeof(rettype)) { \
+ case 1: __mask = 0xffUL; break; \
+ case 2: __mask = 0xffffUL; break; \
+ case 4: __mask = 0xffffffffUL; break; \
+ default: break; \
+ } \
+ __mask; \
+ })
+
+
#define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \
pre, post, ...) \
({ \
@@ -535,7 +547,7 @@ int paravirt_disable_iospace(void);
paravirt_clobber(clbr), \
##__VA_ARGS__ \
: "memory", "cc" extra_clbr); \
- __ret = (rettype)__eax; \
+ __ret = (rettype)(__eax & PVOP_RETMASK(rettype)); \
} \
__ret; \
})
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 84f58de08c2b..9fa03604b2b3 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -507,17 +507,6 @@ do { \
#endif
-/* This is not atomic against other CPUs -- CPU preemption needs to be off */
-#define x86_test_and_clear_bit_percpu(bit, var) \
-({ \
- bool old__; \
- asm volatile("btr %2,"__percpu_arg(1)"\n\t" \
- CC_SET(c) \
- : CC_OUT(c) (old__), "+m" (var) \
- : "dIr" (bit)); \
- old__; \
-})
-
static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr,
const unsigned long __percpu *addr)
{
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 1cc82ece9ac1..62b775926045 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -116,8 +116,7 @@ static inline void native_pgd_clear(pgd_t *pgd)
native_set_pgd(pgd, native_make_pgd(0));
}
-extern void sync_global_pgds(unsigned long start, unsigned long end,
- int removed);
+extern void sync_global_pgds(unsigned long start, unsigned long end);
/*
* Conversion functions: convert a page and protection to a page entry,
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 17f218645701..ec1f3c651150 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -24,7 +24,13 @@ static __always_inline int preempt_count(void)
static __always_inline void preempt_count_set(int pc)
{
- raw_cpu_write_4(__preempt_count, pc);
+ int old, new;
+
+ do {
+ old = raw_cpu_read_4(__preempt_count);
+ new = (old & PREEMPT_NEED_RESCHED) |
+ (pc & ~PREEMPT_NEED_RESCHED);
+ } while (raw_cpu_cmpxchg_4(__preempt_count, old, new) != old);
}
/*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 984a7bf17f6a..eaf100508c36 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -137,6 +137,17 @@ struct cpuinfo_x86 {
u32 microcode;
};
+struct cpuid_regs {
+ u32 eax, ebx, ecx, edx;
+};
+
+enum cpuid_regs_idx {
+ CPUID_EAX = 0,
+ CPUID_EBX,
+ CPUID_ECX,
+ CPUID_EDX,
+};
+
#define X86_VENDOR_INTEL 0
#define X86_VENDOR_CYRIX 1
#define X86_VENDOR_AMD 2
@@ -178,6 +189,9 @@ extern void identify_secondary_cpu(struct cpuinfo_x86 *);
extern void print_cpu_info(struct cpuinfo_x86 *);
void print_cpu_msr(struct cpuinfo_x86 *);
extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
+extern u32 get_scattered_cpuid_leaf(unsigned int level,
+ unsigned int sub_leaf,
+ enum cpuid_regs_idx reg);
extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
@@ -588,43 +602,76 @@ static __always_inline void cpu_relax(void)
rep_nop();
}
-#define cpu_relax_lowlatency() cpu_relax()
-
-/* Stop speculative execution and prefetching of modified code. */
+/*
+ * This function forces the icache and prefetched instruction stream to
+ * catch up with reality in two very specific cases:
+ *
+ * a) Text was modified using one virtual address and is about to be executed
+ * from the same physical page at a different virtual address.
+ *
+ * b) Text was modified on a different CPU, may subsequently be
+ * executed on this CPU, and you want to make sure the new version
+ * gets executed. This generally means you're calling this in a IPI.
+ *
+ * If you're calling this for a different reason, you're probably doing
+ * it wrong.
+ */
static inline void sync_core(void)
{
- int tmp;
-
-#ifdef CONFIG_M486
/*
- * Do a CPUID if available, otherwise do a jump. The jump
- * can conveniently enough be the jump around CPUID.
+ * There are quite a few ways to do this. IRET-to-self is nice
+ * because it works on every CPU, at any CPL (so it's compatible
+ * with paravirtualization), and it never exits to a hypervisor.
+ * The only down sides are that it's a bit slow (it seems to be
+ * a bit more than 2x slower than the fastest options) and that
+ * it unmasks NMIs. The "push %cs" is needed because, in
+ * paravirtual environments, __KERNEL_CS may not be a valid CS
+ * value when we do IRET directly.
+ *
+ * In case NMI unmasking or performance ever becomes a problem,
+ * the next best option appears to be MOV-to-CR2 and an
+ * unconditional jump. That sequence also works on all CPUs,
+ * but it will fault at CPL3 (i.e. Xen PV and lguest).
+ *
+ * CPUID is the conventional way, but it's nasty: it doesn't
+ * exist on some 486-like CPUs, and it usually exits to a
+ * hypervisor.
+ *
+ * Like all of Linux's memory ordering operations, this is a
+ * compiler barrier as well.
*/
- asm volatile("cmpl %2,%1\n\t"
- "jl 1f\n\t"
- "cpuid\n"
- "1:"
- : "=a" (tmp)
- : "rm" (boot_cpu_data.cpuid_level), "ri" (0), "0" (1)
- : "ebx", "ecx", "edx", "memory");
+ register void *__sp asm(_ASM_SP);
+
+#ifdef CONFIG_X86_32
+ asm volatile (
+ "pushfl\n\t"
+ "pushl %%cs\n\t"
+ "pushl $1f\n\t"
+ "iret\n\t"
+ "1:"
+ : "+r" (__sp) : : "memory");
#else
- /*
- * CPUID is a barrier to speculative execution.
- * Prefetched instructions are automatically
- * invalidated when modified.
- */
- asm volatile("cpuid"
- : "=a" (tmp)
- : "0" (1)
- : "ebx", "ecx", "edx", "memory");
+ unsigned int tmp;
+
+ asm volatile (
+ "mov %%ss, %0\n\t"
+ "pushq %q0\n\t"
+ "pushq %%rsp\n\t"
+ "addq $8, (%%rsp)\n\t"
+ "pushfq\n\t"
+ "mov %%cs, %0\n\t"
+ "pushq %q0\n\t"
+ "pushq $1f\n\t"
+ "iretq\n\t"
+ "1:"
+ : "=&r" (tmp), "+r" (__sp) : : "cc", "memory");
#endif
}
extern void select_idle_routine(const struct cpuinfo_x86 *c);
-extern void init_amd_e400_c1e_mask(void);
+extern void amd_e400_c1e_apic_setup(void);
extern unsigned long boot_option_idle_override;
-extern bool amd_e400_c1e_detected;
enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT,
IDLE_POLL};
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index eaba08076030..c343ab52579f 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -32,6 +32,12 @@ static inline void queued_spin_unlock(struct qspinlock *lock)
{
pv_queued_spin_unlock(lock);
}
+
+#define vcpu_is_preempted vcpu_is_preempted
+static inline bool vcpu_is_preempted(int cpu)
+{
+ return pv_vcpu_is_preempted(cpu);
+}
#else
static inline void queued_spin_unlock(struct qspinlock *lock)
{
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 19a2224f9e16..12af3e35edfa 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -6,11 +6,6 @@
#include <asm/nops.h>
-static inline void native_clts(void)
-{
- asm volatile("clts");
-}
-
/*
* Volatile isn't enough to prevent the compiler from reordering the
* read/write functions for the control registers and messing everything up.
@@ -208,16 +203,8 @@ static inline void load_gs_index(unsigned selector)
#endif
-/* Clear the 'TS' bit */
-static inline void clts(void)
-{
- native_clts();
-}
-
#endif/* CONFIG_PARAVIRT */
-#define stts() write_cr0(read_cr0() | X86_CR0_TS)
-
static inline void clflush(volatile void *__p)
{
asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p));
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 37f2e0b377ad..a3269c897ec5 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -30,8 +30,7 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
int get_stack_info(unsigned long *stack, struct task_struct *task,
struct stack_info *info, unsigned long *visit_mask);
-void stack_type_str(enum stack_type type, const char **begin,
- const char **end);
+const char *stack_type_name(enum stack_type type);
static inline bool on_stack(struct stack_info *info, void *addr, size_t len)
{
@@ -43,8 +42,6 @@ static inline bool on_stack(struct stack_info *info, void *addr, size_t len)
addr + len > begin && addr + len <= end);
}
-extern int kstack_depth_to_print;
-
#ifdef CONFIG_X86_32
#define STACKSLOTS_PER_LINE 8
#else
@@ -86,9 +83,6 @@ get_stack_pointer(struct task_struct *task, struct pt_regs *regs)
void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, char *log_lvl);
-void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *sp, char *log_lvl);
-
extern unsigned int code_bytes;
/* The form of the top of the frame on the stack */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index cf75871d2f81..6358a85e2270 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -146,4 +146,36 @@ struct pci_bus;
int x86_pci_root_bus_node(int bus);
void x86_pci_root_bus_resources(int bus, struct list_head *resources);
+extern bool x86_topology_update;
+
+#ifdef CONFIG_SCHED_MC_PRIO
+#include <asm/percpu.h>
+
+DECLARE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
+extern unsigned int __read_mostly sysctl_sched_itmt_enabled;
+
+/* Interface to set priority of a cpu */
+void sched_set_itmt_core_prio(int prio, int core_cpu);
+
+/* Interface to notify scheduler that system supports ITMT */
+int sched_set_itmt_support(void);
+
+/* Interface to notify scheduler that system revokes ITMT support */
+void sched_clear_itmt_support(void);
+
+#else /* CONFIG_SCHED_MC_PRIO */
+
+#define sysctl_sched_itmt_enabled 0
+static inline void sched_set_itmt_core_prio(int prio, int core_cpu)
+{
+}
+static inline int sched_set_itmt_support(void)
+{
+ return 0;
+}
+static inline void sched_clear_itmt_support(void)
+{
+}
+#endif /* CONFIG_SCHED_MC_PRIO */
+
#endif /* _ASM_X86_TOPOLOGY_H */
diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h
index 2fbc66c7885b..2422b14c50a7 100644
--- a/arch/x86/include/asm/trace/exceptions.h
+++ b/arch/x86/include/asm/trace/exceptions.h
@@ -6,7 +6,7 @@
#include <linux/tracepoint.h>
-extern void trace_irq_vector_regfunc(void);
+extern int trace_irq_vector_regfunc(void);
extern void trace_irq_vector_unregfunc(void);
DECLARE_EVENT_CLASS(x86_exceptions,
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index 9217ab1f5bf6..342e59789fcd 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -14,7 +14,6 @@ DECLARE_EVENT_CLASS(x86_fpu,
__field(struct fpu *, fpu)
__field(bool, fpregs_active)
__field(bool, fpstate_active)
- __field(int, counter)
__field(u64, xfeatures)
__field(u64, xcomp_bv)
),
@@ -23,17 +22,15 @@ DECLARE_EVENT_CLASS(x86_fpu,
__entry->fpu = fpu;
__entry->fpregs_active = fpu->fpregs_active;
__entry->fpstate_active = fpu->fpstate_active;
- __entry->counter = fpu->counter;
if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
__entry->xfeatures = fpu->state.xsave.header.xfeatures;
__entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv;
}
),
- TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d counter: %d xfeatures: %llx xcomp_bv: %llx",
+ TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d xfeatures: %llx xcomp_bv: %llx",
__entry->fpu,
__entry->fpregs_active,
__entry->fpstate_active,
- __entry->counter,
__entry->xfeatures,
__entry->xcomp_bv
)
diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
index 38a09a13a9bc..32dd6a9e343c 100644
--- a/arch/x86/include/asm/trace/irq_vectors.h
+++ b/arch/x86/include/asm/trace/irq_vectors.h
@@ -6,7 +6,7 @@
#include <linux/tracepoint.h>
-extern void trace_irq_vector_regfunc(void);
+extern int trace_irq_vector_regfunc(void);
extern void trace_irq_vector_unregfunc(void);
DECLARE_EVENT_CLASS(x86_irq_vector,
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 33b6365c22fe..abb1fdcc545a 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -45,8 +45,17 @@ extern int tsc_clocksource_reliable;
* Boot-time check whether the TSCs are synchronized across
* all CPUs/cores:
*/
+#ifdef CONFIG_X86_TSC
+extern bool tsc_store_and_check_tsc_adjust(bool bootcpu);
+extern void tsc_verify_tsc_adjust(bool resume);
extern void check_tsc_sync_source(int cpu);
extern void check_tsc_sync_target(void);
+#else
+static inline bool tsc_store_and_check_tsc_adjust(bool bootcpu) { return false; }
+static inline void tsc_verify_tsc_adjust(bool resume) { }
+static inline void check_tsc_sync_source(int cpu) { }
+static inline void check_tsc_sync_target(void) { }
+#endif
extern int notsc_setup(char *);
extern void tsc_save_sched_clock_state(void);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index faf3687f1035..ea148313570f 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -68,6 +68,12 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
__chk_range_not_ok((unsigned long __force)(addr), size, limit); \
})
+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+# define WARN_ON_IN_IRQ() WARN_ON_ONCE(!in_task())
+#else
+# define WARN_ON_IN_IRQ()
+#endif
+
/**
* access_ok: - Checks if a user space pointer is valid
* @type: Type of access: %VERIFY_READ or %VERIFY_WRITE. Note that
@@ -88,8 +94,11 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
* checks that the pointer is in the user space range - after calling
* this function, memory access functions may still return -EFAULT.
*/
-#define access_ok(type, addr, size) \
- likely(!__range_not_ok(addr, size, user_addr_max()))
+#define access_ok(type, addr, size) \
+({ \
+ WARN_ON_IN_IRQ(); \
+ likely(!__range_not_ok(addr, size, user_addr_max())); \
+})
/*
* These are the main single-value transfer routines. They automatically
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index 46de9ac4b990..6fa75b17aec3 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -12,7 +12,8 @@ struct unwind_state {
struct task_struct *task;
int graph_idx;
#ifdef CONFIG_FRAME_POINTER
- unsigned long *bp;
+ unsigned long *bp, *orig_sp;
+ struct pt_regs *regs;
#else
unsigned long *sp;
#endif
@@ -47,7 +48,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
if (unwind_done(state))
return NULL;
- return state->bp + 1;
+ return state->regs ? &state->regs->ip : state->bp + 1;
+}
+
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+{
+ if (unwind_done(state))
+ return NULL;
+
+ return state->regs;
}
#else /* !CONFIG_FRAME_POINTER */
@@ -58,6 +67,11 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
return NULL;
}
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+{
+ return NULL;
+}
+
#endif /* CONFIG_FRAME_POINTER */
#endif /* _ASM_X86_UNWIND_H */
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index e728699db774..3a01996db58f 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -89,8 +89,13 @@ static inline unsigned int __getcpu(void)
* works on all CPUs. This is volatile so that it orders
* correctly wrt barrier() and to keep gcc from cleverly
* hoisting it out of the calling function.
+ *
+ * If RDPID is available, use it.
*/
- asm volatile ("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
+ alternative_io ("lsl %[p],%[seg]",
+ ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
+ X86_FEATURE_RDPID,
+ [p] "=a" (p), [seg] "r" (__PER_CPU_SEG));
return p;
}
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index a002b07a7099..2b5b2d4b924e 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -25,6 +25,7 @@
#define VMX_H
+#include <linux/bitops.h>
#include <linux/types.h>
#include <uapi/asm/vmx.h>
@@ -60,6 +61,7 @@
*/
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
+#define SECONDARY_EXEC_DESC 0x00000004
#define SECONDARY_EXEC_RDTSCP 0x00000008
#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
@@ -110,6 +112,36 @@
#define VMX_MISC_SAVE_EFER_LMA 0x00000020
#define VMX_MISC_ACTIVITY_HLT 0x00000040
+static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
+{
+ return vmx_basic & GENMASK_ULL(30, 0);
+}
+
+static inline u32 vmx_basic_vmcs_size(u64 vmx_basic)
+{
+ return (vmx_basic & GENMASK_ULL(44, 32)) >> 32;
+}
+
+static inline int vmx_misc_preemption_timer_rate(u64 vmx_misc)
+{
+ return vmx_misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
+}
+
+static inline int vmx_misc_cr3_count(u64 vmx_misc)
+{
+ return (vmx_misc & GENMASK_ULL(24, 16)) >> 16;
+}
+
+static inline int vmx_misc_max_msr(u64 vmx_misc)
+{
+ return (vmx_misc & GENMASK_ULL(27, 25)) >> 25;
+}
+
+static inline int vmx_misc_mseg_revid(u64 vmx_misc)
+{
+ return (vmx_misc & GENMASK_ULL(63, 32)) >> 32;
+}
+
/* VMCS Encodings */
enum vmcs_field {
VIRTUAL_PROCESSOR_ID = 0x00000000,
@@ -399,10 +431,11 @@ enum vmcs_field {
#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 2)
#define VMX_NR_VPIDS (1 << 16)
+#define VMX_VPID_EXTENT_INDIVIDUAL_ADDR 0
#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1
#define VMX_VPID_EXTENT_ALL_CONTEXT 2
+#define VMX_VPID_EXTENT_SINGLE_NON_GLOBAL 3
-#define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0
#define VMX_EPT_EXTENT_CONTEXT 1
#define VMX_EPT_EXTENT_GLOBAL 2
#define VMX_EPT_EXTENT_SHIFT 24
@@ -419,8 +452,10 @@ enum vmcs_field {
#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26)
#define VMX_VPID_INVVPID_BIT (1ull << 0) /* (32 - 32) */
+#define VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT (1ull << 8) /* (40 - 32) */
#define VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT (1ull << 9) /* (41 - 32) */
#define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */
+#define VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT (1ull << 11) /* (43 - 32) */
#define VMX_EPT_DEFAULT_GAW 3
#define VMX_EPT_MAX_GAW 0x4
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 6ba793178441..7ba7e90a9ad6 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -59,7 +59,7 @@ struct x86_init_irqs {
/**
* struct x86_init_oem - oem platform specific customizing functions
- * @arch_setup: platform specific architecure setup
+ * @arch_setup: platform specific architecture setup
* @banner: print a platform specific banner
*/
struct x86_init_oem {
@@ -165,8 +165,25 @@ struct x86_legacy_devices {
};
/**
+ * enum x86_legacy_i8042_state - i8042 keyboard controller state
+ * @X86_LEGACY_I8042_PLATFORM_ABSENT: the controller is always absent on
+ * given platform/subarch.
+ * @X86_LEGACY_I8042_FIRMWARE_ABSENT: firmware reports that the controller
+ * is absent.
+ * @X86_LEGACY_i8042_EXPECTED_PRESENT: the controller is likely to be
+ * present, the i8042 driver should probe for controller existence.
+ */
+enum x86_legacy_i8042_state {
+ X86_LEGACY_I8042_PLATFORM_ABSENT,
+ X86_LEGACY_I8042_FIRMWARE_ABSENT,
+ X86_LEGACY_I8042_EXPECTED_PRESENT,
+};
+
+/**
* struct x86_legacy_features - legacy x86 features
*
+ * @i8042: indicated if we expect the device to have i8042 controller
+ * present.
* @rtc: this device has a CMOS real-time clock present
* @reserve_bios_regions: boot code will search for the EBDA address and the
* start of the 640k - 1M BIOS region. If false, the platform must
@@ -175,6 +192,7 @@ struct x86_legacy_devices {
* documentation for further details.
*/
struct x86_legacy_features {
+ enum x86_legacy_i8042_state i8042;
int rtc;
int reserve_bios_regions;
struct x86_legacy_devices devices;
@@ -188,15 +206,14 @@ struct x86_legacy_features {
* @set_wallclock: set time back to HW clock
* @is_untracked_pat_range exclude from PAT logic
* @nmi_init enable NMI on cpus
- * @i8042_detect pre-detect if i8042 controller exists
* @save_sched_clock_state: save state for sched_clock() on suspend
* @restore_sched_clock_state: restore state for sched_clock() on resume
- * @apic_post_init: adjust apic if neeeded
+ * @apic_post_init: adjust apic if needed
* @legacy: legacy features
* @set_legacy_features: override legacy features. Use of this callback
* is highly discouraged. You should only need
* this if your hardware platform requires further
- * custom fine tuning far beyong what may be
+ * custom fine tuning far beyond what may be
* possible in x86_early_init_platform_quirks() by
* only using the current x86_hardware_subarch
* semantics.
@@ -210,7 +227,6 @@ struct x86_platform_ops {
bool (*is_untracked_pat_range)(u64 start, u64 end);
void (*nmi_init)(void);
unsigned char (*get_nmi_reason)(void);
- int (*i8042_detect)(void);
void (*save_sched_clock_state)(void);
void (*restore_sched_clock_state)(void);
void (*apic_post_init)(void);
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index c18ce67495fa..b10bf319ed20 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -7,6 +7,7 @@
#define SETUP_DTB 2
#define SETUP_PCI 3
#define SETUP_EFI 4
+#define SETUP_APPLE_PROPERTIES 5
/* ram_size flags */
#define RAMDISK_IMAGE_START_MASK 0x07FF
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 94dc8ca434e0..1421a6585126 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -45,7 +45,9 @@ struct kvm_steal_time {
__u64 steal;
__u32 version;
__u32 flags;
- __u32 pad[12];
+ __u8 preempted;
+ __u8 u8_pad[3];
+ __u32 pad[11];
};
#define KVM_STEAL_ALIGNMENT_BITS 5
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 69a6e07e3149..eb6247a7009b 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -28,6 +28,7 @@ struct mce {
__u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
__u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */
__u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */
+ __u64 ppin; /* Protected Processor Inventory Number */
};
#define MCE_GET_RECORD_LEN _IOR('M', 1, int)
diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index ae135de547f5..835aa51c7f6e 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -6,10 +6,8 @@
#define ARCH_GET_FS 0x1003
#define ARCH_GET_GS 0x1004
-#ifdef CONFIG_CHECKPOINT_RESTORE
-# define ARCH_MAP_VDSO_X32 0x2001
-# define ARCH_MAP_VDSO_32 0x2002
-# define ARCH_MAP_VDSO_64 0x2003
-#endif
+#define ARCH_MAP_VDSO_X32 0x2001
+#define ARCH_MAP_VDSO_32 0x2002
+#define ARCH_MAP_VDSO_64 0x2003
#endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 37fee272618f..14458658e988 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -65,6 +65,8 @@
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
#define EXIT_REASON_APIC_ACCESS 44
#define EXIT_REASON_EOI_INDUCED 45
+#define EXIT_REASON_GDTR_IDTR 46
+#define EXIT_REASON_LDTR_TR 47
#define EXIT_REASON_EPT_VIOLATION 48
#define EXIT_REASON_EPT_MISCONFIG 49
#define EXIT_REASON_INVEPT 50
@@ -113,6 +115,8 @@
{ EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \
{ EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \
{ EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \
+ { EXIT_REASON_GDTR_IDTR, "GDTR_IDTR" }, \
+ { EXIT_REASON_LDTR_TR, "LDTR_TR" }, \
{ EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \
{ EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \
{ EXIT_REASON_INVEPT, "INVEPT" }, \
@@ -129,6 +133,7 @@
{ EXIT_REASON_XRSTORS, "XRSTORS" }
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
+#define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2
#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4
#endif /* _UAPIVMX_H */