aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/include
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--arch/x86/include/asm/acenv.h14
-rw-r--r--arch/x86/include/asm/acrn.h14
-rw-r--r--arch/x86/include/asm/alternative.h2
-rw-r--r--arch/x86/include/asm/amd-ibs.h34
-rw-r--r--arch/x86/include/asm/amd_hsmp.h16
-rw-r--r--arch/x86/include/asm/amd_nb.h2
-rw-r--r--arch/x86/include/asm/apic.h9
-rw-r--r--arch/x86/include/asm/apicdef.h10
-rw-r--r--arch/x86/include/asm/archrandom.h55
-rw-r--r--arch/x86/include/asm/asm.h37
-rw-r--r--arch/x86/include/asm/barrier.h10
-rw-r--r--arch/x86/include/asm/bitops.h99
-rw-r--r--arch/x86/include/asm/bootparam_utils.h1
-rw-r--r--arch/x86/include/asm/bug.h23
-rw-r--r--arch/x86/include/asm/cfi.h22
-rw-r--r--arch/x86/include/asm/checksum.h16
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h21
-rw-r--r--arch/x86/include/asm/cmpxchg_64.h6
-rw-r--r--arch/x86/include/asm/coco.h32
-rw-r--r--arch/x86/include/asm/compat.h110
-rw-r--r--arch/x86/include/asm/cpu.h28
-rw-r--r--arch/x86/include/asm/cpu_entry_area.h2
-rw-r--r--arch/x86/include/asm/cpufeature.h24
-rw-r--r--arch/x86/include/asm/cpufeatures.h36
-rw-r--r--arch/x86/include/asm/cpuid.h34
-rw-r--r--arch/x86/include/asm/cpumask.h10
-rw-r--r--arch/x86/include/asm/disabled-features.h44
-rw-r--r--arch/x86/include/asm/dma-mapping.h12
-rw-r--r--arch/x86/include/asm/dma.h8
-rw-r--r--arch/x86/include/asm/efi.h46
-rw-r--r--arch/x86/include/asm/elf.h15
-rw-r--r--arch/x86/include/asm/entry-common.h4
-rw-r--r--arch/x86/include/asm/extable.h14
-rw-r--r--arch/x86/include/asm/extable_fixup_types.h61
-rw-r--r--arch/x86/include/asm/fpu/api.h16
-rw-r--r--arch/x86/include/asm/fpu/internal.h0
-rw-r--r--arch/x86/include/asm/fpu/sched.h2
-rw-r--r--arch/x86/include/asm/fpu/signal.h3
-rw-r--r--arch/x86/include/asm/fpu/types.h32
-rw-r--r--arch/x86/include/asm/ftrace.h8
-rw-r--r--arch/x86/include/asm/futex.h28
-rw-r--r--arch/x86/include/asm/gart.h5
-rw-r--r--arch/x86/include/asm/highmem.h1
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h5
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h55
-rw-r--r--arch/x86/include/asm/ibt.h116
-rw-r--r--arch/x86/include/asm/idtentry.h29
-rw-r--r--arch/x86/include/asm/insn-eval.h15
-rw-r--r--arch/x86/include/asm/insn.h2
-rw-r--r--arch/x86/include/asm/intel-family.h19
-rw-r--r--arch/x86/include/asm/intel_ds.h5
-rw-r--r--arch/x86/include/asm/intel_pt.h2
-rw-r--r--arch/x86/include/asm/io.h73
-rw-r--r--arch/x86/include/asm/iommu.h12
-rw-r--r--arch/x86/include/asm/iommu_table.h102
-rw-r--r--arch/x86/include/asm/irq_stack.h6
-rw-r--r--arch/x86/include/asm/irqflags.h10
-rw-r--r--arch/x86/include/asm/jump_label.h6
-rw-r--r--arch/x86/include/asm/kexec.h20
-rw-r--r--arch/x86/include/asm/kmsan.h87
-rw-r--r--arch/x86/include/asm/kprobes.h2
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h129
-rw-r--r--arch/x86/include/asm/kvm-x86-pmu-ops.h31
-rw-r--r--arch/x86/include/asm/kvm_host.h498
-rw-r--r--arch/x86/include/asm/kvm_page_track.h6
-rw-r--r--arch/x86/include/asm/kvm_para.h22
-rw-r--r--arch/x86/include/asm/linkage.h65
-rw-r--r--arch/x86/include/asm/livepatch.h20
-rw-r--r--arch/x86/include/asm/mc146818rtc.h2
-rw-r--r--arch/x86/include/asm/mce.h29
-rw-r--r--arch/x86/include/asm/mem_encrypt.h8
-rw-r--r--arch/x86/include/asm/microcode.h6
-rw-r--r--arch/x86/include/asm/microcode_amd.h2
-rw-r--r--arch/x86/include/asm/microcode_intel.h4
-rw-r--r--arch/x86/include/asm/mmu_context.h2
-rw-r--r--arch/x86/include/asm/mmx.h15
-rw-r--r--arch/x86/include/asm/mshyperv.h13
-rw-r--r--arch/x86/include/asm/msi.h19
-rw-r--r--arch/x86/include/asm/msr-index.h137
-rw-r--r--arch/x86/include/asm/msr.h37
-rw-r--r--arch/x86/include/asm/mtrr.h8
-rw-r--r--arch/x86/include/asm/mwait.h1
-rw-r--r--arch/x86/include/asm/nmi.h1
-rw-r--r--arch/x86/include/asm/nospec-branch.h186
-rw-r--r--arch/x86/include/asm/page.h10
-rw-r--r--arch/x86/include/asm/page_32.h14
-rw-r--r--arch/x86/include/asm/page_64.h10
-rw-r--r--arch/x86/include/asm/paravirt.h9
-rw-r--r--arch/x86/include/asm/paravirt_api_clock.h1
-rw-r--r--arch/x86/include/asm/paravirt_types.h33
-rw-r--r--arch/x86/include/asm/pci.h5
-rw-r--r--arch/x86/include/asm/pci_x86.h24
-rw-r--r--arch/x86/include/asm/percpu.h6
-rw-r--r--arch/x86/include/asm/perf_event.h78
-rw-r--r--arch/x86/include/asm/pgtable-3level.h8
-rw-r--r--arch/x86/include/asm/pgtable.h87
-rw-r--r--arch/x86/include/asm/pgtable_64.h4
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h52
-rw-r--r--arch/x86/include/asm/pgtable_types.h25
-rw-r--r--arch/x86/include/asm/pkeys.h8
-rw-r--r--arch/x86/include/asm/pkru.h4
-rw-r--r--arch/x86/include/asm/preempt.h10
-rw-r--r--arch/x86/include/asm/processor.h19
-rw-r--r--arch/x86/include/asm/proto.h8
-rw-r--r--arch/x86/include/asm/ptrace.h6
-rw-r--r--arch/x86/include/asm/qspinlock.h1
-rw-r--r--arch/x86/include/asm/qspinlock_paravirt.h20
-rw-r--r--arch/x86/include/asm/realmode.h2
-rw-r--r--arch/x86/include/asm/required-features.h4
-rw-r--r--arch/x86/include/asm/resctrl.h9
-rw-r--r--arch/x86/include/asm/rmwcc.h6
-rw-r--r--arch/x86/include/asm/segment.h26
-rw-r--r--arch/x86/include/asm/set_memory.h52
-rw-r--r--arch/x86/include/asm/setup.h51
-rw-r--r--arch/x86/include/asm/sev-common.h137
-rw-r--r--arch/x86/include/asm/sev.h144
-rw-r--r--arch/x86/include/asm/sgx.h26
-rw-r--r--arch/x86/include/asm/shared/io.h34
-rw-r--r--arch/x86/include/asm/shared/msr.h15
-rw-r--r--arch/x86/include/asm/shared/tdx.h40
-rw-r--r--arch/x86/include/asm/smap.h24
-rw-r--r--arch/x86/include/asm/smp.h25
-rw-r--r--arch/x86/include/asm/sparsemem.h2
-rw-r--r--arch/x86/include/asm/spec-ctrl.h10
-rw-r--r--arch/x86/include/asm/special_insns.h16
-rw-r--r--arch/x86/include/asm/static_call.h24
-rw-r--r--arch/x86/include/asm/string_32.h33
-rw-r--r--arch/x86/include/asm/string_64.h26
-rw-r--r--arch/x86/include/asm/suspend_32.h2
-rw-r--r--arch/x86/include/asm/suspend_64.h12
-rw-r--r--arch/x86/include/asm/svm.h233
-rw-r--r--arch/x86/include/asm/swiotlb.h30
-rw-r--r--arch/x86/include/asm/switch_to.h8
-rw-r--r--arch/x86/include/asm/syscall_wrapper.h8
-rw-r--r--arch/x86/include/asm/tdx.h91
-rw-r--r--arch/x86/include/asm/text-patching.h32
-rw-r--r--arch/x86/include/asm/thread_info.h4
-rw-r--r--arch/x86/include/asm/timex.h9
-rw-r--r--arch/x86/include/asm/tlb.h3
-rw-r--r--arch/x86/include/asm/tlbflush.h103
-rw-r--r--arch/x86/include/asm/topology.h15
-rw-r--r--arch/x86/include/asm/traps.h4
-rw-r--r--arch/x86/include/asm/tsc.h7
-rw-r--r--arch/x86/include/asm/uaccess.h252
-rw-r--r--arch/x86/include/asm/uaccess_64.h45
-rw-r--r--arch/x86/include/asm/unistd.h1
-rw-r--r--arch/x86/include/asm/unwind.h23
-rw-r--r--arch/x86/include/asm/unwind_hints.h14
-rw-r--r--arch/x86/include/asm/user_32.h4
-rw-r--r--arch/x86/include/asm/user_64.h4
-rw-r--r--arch/x86/include/asm/vdso.h2
-rw-r--r--arch/x86/include/asm/vmx.h30
-rw-r--r--arch/x86/include/asm/vmxfeatures.h6
-rw-r--r--arch/x86/include/asm/word-at-a-time.h26
-rw-r--r--arch/x86/include/asm/x86_init.h22
-rw-r--r--arch/x86/include/asm/xen/cpuid.h9
-rw-r--r--arch/x86/include/asm/xen/events.h3
-rw-r--r--arch/x86/include/asm/xen/hypervisor.h14
-rw-r--r--arch/x86/include/asm/xen/page-coherent.h24
-rw-r--r--arch/x86/include/asm/xen/page.h22
-rw-r--r--arch/x86/include/asm/xen/swiotlb-xen.h8
-rw-r--r--arch/x86/include/asm/xor.h42
-rw-r--r--arch/x86/include/asm/xor_32.h42
-rw-r--r--arch/x86/include/asm/xor_avx.h21
-rw-r--r--arch/x86/include/uapi/asm/amd_hsmp.h307
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h21
-rw-r--r--arch/x86/include/uapi/asm/kvm.h50
-rw-r--r--arch/x86/include/uapi/asm/mman.h14
-rw-r--r--arch/x86/include/uapi/asm/prctl.h26
-rw-r--r--arch/x86/include/uapi/asm/processor-flags.h2
-rw-r--r--arch/x86/include/uapi/asm/sgx.h62
-rw-r--r--arch/x86/include/uapi/asm/shmbuf.h6
-rw-r--r--arch/x86/include/uapi/asm/signal.h2
-rw-r--r--arch/x86/include/uapi/asm/svm.h13
-rw-r--r--arch/x86/include/uapi/asm/vmx.h4
175 files changed, 3965 insertions, 1638 deletions
diff --git a/arch/x86/include/asm/acenv.h b/arch/x86/include/asm/acenv.h
index 9aff97f0de7f..d937c55e717e 100644
--- a/arch/x86/include/asm/acenv.h
+++ b/arch/x86/include/asm/acenv.h
@@ -13,7 +13,19 @@
/* Asm macros */
-#define ACPI_FLUSH_CPU_CACHE() wbinvd()
+/*
+ * ACPI_FLUSH_CPU_CACHE() flushes caches on entering sleep states.
+ * It is required to prevent data loss.
+ *
+ * While running inside virtual machine, the kernel can bypass cache flushing.
+ * Changing sleep state in a virtual machine doesn't affect the host system
+ * sleep state and cannot lead to data loss.
+ */
+#define ACPI_FLUSH_CPU_CACHE() \
+do { \
+ if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) \
+ wbinvd(); \
+} while (0)
int __acpi_acquire_global_lock(unsigned int *lock);
int __acpi_release_global_lock(unsigned int *lock);
diff --git a/arch/x86/include/asm/acrn.h b/arch/x86/include/asm/acrn.h
index e003a01b7c67..1dd14381bcb6 100644
--- a/arch/x86/include/asm/acrn.h
+++ b/arch/x86/include/asm/acrn.h
@@ -10,6 +10,15 @@
/* Bit 0 indicates whether guest VM is privileged */
#define ACRN_FEATURE_PRIVILEGED_VM BIT(0)
+/*
+ * Timing Information.
+ * This leaf returns the current TSC frequency in kHz.
+ *
+ * EAX: (Virtual) TSC frequency in kHz.
+ * EBX, ECX, EDX: RESERVED (reserved fields are set to zero).
+ */
+#define ACRN_CPUID_TIMING_INFO 0x40000010
+
void acrn_setup_intr_handler(void (*handler)(void));
void acrn_remove_intr_handler(void);
@@ -21,6 +30,11 @@ static inline u32 acrn_cpuid_base(void)
return 0;
}
+static inline unsigned long acrn_get_tsc_khz(void)
+{
+ return cpuid_eax(ACRN_CPUID_TIMING_INFO);
+}
+
/*
* Hypercalls for ACRN
*
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 58eee6402832..9542c582d546 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -76,6 +76,8 @@ extern int alternatives_patched;
extern void alternative_instructions(void);
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
extern void apply_retpolines(s32 *start, s32 *end);
+extern void apply_returns(s32 *start, s32 *end);
+extern void apply_ibt_endbr(s32 *start, s32 *end);
struct module;
diff --git a/arch/x86/include/asm/amd-ibs.h b/arch/x86/include/asm/amd-ibs.h
index 46e1df45efc0..cb2a5e113daa 100644
--- a/arch/x86/include/asm/amd-ibs.h
+++ b/arch/x86/include/asm/amd-ibs.h
@@ -6,6 +6,22 @@
#include <asm/msr-index.h>
+/* IBS_OP_DATA2 DataSrc */
+#define IBS_DATA_SRC_LOC_CACHE 2
+#define IBS_DATA_SRC_DRAM 3
+#define IBS_DATA_SRC_REM_CACHE 4
+#define IBS_DATA_SRC_IO 7
+
+/* IBS_OP_DATA2 DataSrc Extension */
+#define IBS_DATA_SRC_EXT_LOC_CACHE 1
+#define IBS_DATA_SRC_EXT_NEAR_CCX_CACHE 2
+#define IBS_DATA_SRC_EXT_DRAM 3
+#define IBS_DATA_SRC_EXT_FAR_CCX_CACHE 5
+#define IBS_DATA_SRC_EXT_PMEM 6
+#define IBS_DATA_SRC_EXT_IO 7
+#define IBS_DATA_SRC_EXT_EXT_MEM 8
+#define IBS_DATA_SRC_EXT_PEER_AGENT_MEM 12
+
/*
* IBS Hardware MSRs
*/
@@ -29,7 +45,10 @@ union ibs_fetch_ctl {
rand_en:1, /* 57: random tagging enable */
fetch_l2_miss:1,/* 58: L2 miss for sampled fetch
* (needs IbsFetchComp) */
- reserved:5; /* 59-63: reserved */
+ l3_miss_only:1, /* 59: Collect L3 miss samples only */
+ fetch_oc_miss:1,/* 60: Op cache miss for the sampled fetch */
+ fetch_l3_miss:1,/* 61: L3 cache miss for the sampled fetch */
+ reserved:2; /* 62-63: reserved */
};
};
@@ -38,18 +57,18 @@ union ibs_op_ctl {
__u64 val;
struct {
__u64 opmaxcnt:16, /* 0-15: periodic op max. count */
- reserved0:1, /* 16: reserved */
+ l3_miss_only:1, /* 16: Collect L3 miss samples only */
op_en:1, /* 17: op sampling enable */
op_val:1, /* 18: op sample valid */
cnt_ctl:1, /* 19: periodic op counter control */
opmaxcnt_ext:7, /* 20-26: upper 7 bits of periodic op maximum count */
- reserved1:5, /* 27-31: reserved */
+ reserved0:5, /* 27-31: reserved */
opcurcnt:27, /* 32-58: periodic op counter current count */
- reserved2:5; /* 59-63: reserved */
+ reserved1:5; /* 59-63: reserved */
};
};
-/* MSR 0xc0011035: IBS Op Data 2 */
+/* MSR 0xc0011035: IBS Op Data 1 */
union ibs_op_data {
__u64 val;
struct {
@@ -71,11 +90,12 @@ union ibs_op_data {
union ibs_op_data2 {
__u64 val;
struct {
- __u64 data_src:3, /* 0-2: data source */
+ __u64 data_src_lo:3, /* 0-2: data source low */
reserved0:1, /* 3: reserved */
rmt_node:1, /* 4: destination node */
cache_hit_st:1, /* 5: cache hit state */
- reserved1:57; /* 5-63: reserved */
+ data_src_hi:2, /* 6-7: data source high */
+ reserved1:56; /* 8-63: reserved */
};
};
diff --git a/arch/x86/include/asm/amd_hsmp.h b/arch/x86/include/asm/amd_hsmp.h
new file mode 100644
index 000000000000..03c2ce3edaf5
--- /dev/null
+++ b/arch/x86/include/asm/amd_hsmp.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+#ifndef _ASM_X86_AMD_HSMP_H_
+#define _ASM_X86_AMD_HSMP_H_
+
+#include <uapi/asm/amd_hsmp.h>
+
+#if IS_ENABLED(CONFIG_AMD_HSMP)
+int hsmp_send_message(struct hsmp_message *msg);
+#else
+static inline int hsmp_send_message(struct hsmp_message *msg)
+{
+ return -ENODEV;
+}
+#endif
+#endif /*_ASM_X86_AMD_HSMP_H_*/
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 455066a06f60..ed0eaf65c437 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -16,7 +16,6 @@ extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[];
extern bool early_is_amd_nb(u32 value);
extern struct resource *amd_get_mmconfig_range(struct resource *res);
-extern int amd_cache_northbridges(void);
extern void amd_flush_garts(void);
extern int amd_numa_init(void);
extern int amd_get_subcaches(int);
@@ -24,7 +23,6 @@ extern int amd_set_subcaches(int, unsigned long);
extern int amd_smn_read(u16 node, u32 address, u32 *value);
extern int amd_smn_write(u16 node, u32 address, u32 value);
-extern int amd_df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo);
struct amd_l3_cache {
unsigned indices;
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 48067af94678..3415321c8240 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -98,8 +98,6 @@ static inline bool apic_from_smp_config(void)
#include <asm/paravirt.h>
#endif
-extern int setup_profiling_timer(unsigned int);
-
static inline void native_apic_mem_write(u32 reg, u32 v)
{
volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
@@ -328,6 +326,8 @@ struct apic {
/* wakeup_secondary_cpu */
int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip);
+ /* wakeup secondary CPU using 64-bit wakeup point */
+ int (*wakeup_secondary_cpu_64)(int apicid, unsigned long start_eip);
void (*inquire_remote_apic)(int apicid);
@@ -488,6 +488,11 @@ static inline unsigned int read_apic_id(void)
return apic->get_apic_id(reg);
}
+#ifdef CONFIG_X86_64
+typedef int (*wakeup_cpu_handler)(int apicid, unsigned long start_eip);
+extern void acpi_wake_cpu_handler_update(wakeup_cpu_handler handler);
+#endif
+
extern int default_apic_id_valid(u32 apicid);
extern int default_acpi_madt_oem_check(char *, char *);
extern void default_setup_apic_routing(void);
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 5716f22f81ac..68d213e83fcc 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -89,18 +89,12 @@
#define APIC_DM_EXTINT 0x00700
#define APIC_VECTOR_MASK 0x000FF
#define APIC_ICR2 0x310
-#define GET_APIC_DEST_FIELD(x) (((x) >> 24) & 0xFF)
-#define SET_APIC_DEST_FIELD(x) ((x) << 24)
+#define GET_XAPIC_DEST_FIELD(x) (((x) >> 24) & 0xFF)
+#define SET_XAPIC_DEST_FIELD(x) ((x) << 24)
#define APIC_LVTT 0x320
#define APIC_LVTTHMR 0x330
#define APIC_LVTPC 0x340
#define APIC_LVT0 0x350
-#define APIC_LVT_TIMER_BASE_MASK (0x3 << 18)
-#define GET_APIC_TIMER_BASE(x) (((x) >> 18) & 0x3)
-#define SET_APIC_TIMER_BASE(x) (((x) << 18))
-#define APIC_TIMER_BASE_CLKIN 0x0
-#define APIC_TIMER_BASE_TMBASE 0x1
-#define APIC_TIMER_BASE_DIV 0x2
#define APIC_LVT_TIMER_ONESHOT (0 << 17)
#define APIC_LVT_TIMER_PERIODIC (1 << 17)
#define APIC_LVT_TIMER_TSCDEADLINE (2 << 17)
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
index ebc248e49549..02bae8e0758b 100644
--- a/arch/x86/include/asm/archrandom.h
+++ b/arch/x86/include/asm/archrandom.h
@@ -31,20 +31,6 @@ static inline bool __must_check rdrand_long(unsigned long *v)
return false;
}
-static inline bool __must_check rdrand_int(unsigned int *v)
-{
- bool ok;
- unsigned int retry = RDRAND_RETRY_LOOPS;
- do {
- asm volatile("rdrand %[out]"
- CC_SET(c)
- : CC_OUT(c) (ok), [out] "=r" (*v));
- if (ok)
- return true;
- } while (--retry);
- return false;
-}
-
static inline bool __must_check rdseed_long(unsigned long *v)
{
bool ok;
@@ -54,48 +40,23 @@ static inline bool __must_check rdseed_long(unsigned long *v)
return ok;
}
-static inline bool __must_check rdseed_int(unsigned int *v)
-{
- bool ok;
- asm volatile("rdseed %[out]"
- CC_SET(c)
- : CC_OUT(c) (ok), [out] "=r" (*v));
- return ok;
-}
-
/*
* These are the generic interfaces; they must not be declared if the
- * stubs in <linux/random.h> are to be invoked,
- * i.e. CONFIG_ARCH_RANDOM is not defined.
+ * stubs in <linux/random.h> are to be invoked.
*/
-#ifdef CONFIG_ARCH_RANDOM
-static inline bool __must_check arch_get_random_long(unsigned long *v)
+static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t max_longs)
{
- return static_cpu_has(X86_FEATURE_RDRAND) ? rdrand_long(v) : false;
+ return max_longs && static_cpu_has(X86_FEATURE_RDRAND) && rdrand_long(v) ? 1 : 0;
}
-static inline bool __must_check arch_get_random_int(unsigned int *v)
+static inline size_t __must_check arch_get_random_seed_longs(unsigned long *v, size_t max_longs)
{
- return static_cpu_has(X86_FEATURE_RDRAND) ? rdrand_int(v) : false;
+ return max_longs && static_cpu_has(X86_FEATURE_RDSEED) && rdseed_long(v) ? 1 : 0;
}
-static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
-{
- return static_cpu_has(X86_FEATURE_RDSEED) ? rdseed_long(v) : false;
-}
-
-static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
-{
- return static_cpu_has(X86_FEATURE_RDSEED) ? rdseed_int(v) : false;
-}
-
-extern void x86_init_rdrand(struct cpuinfo_x86 *c);
-
-#else /* !CONFIG_ARCH_RANDOM */
-
-static inline void x86_init_rdrand(struct cpuinfo_x86 *c) { }
-
-#endif /* !CONFIG_ARCH_RANDOM */
+#ifndef CONFIG_UML
+void x86_init_rdrand(struct cpuinfo_x86 *c);
+#endif
#endif /* ASM_X86_ARCHRANDOM_H */
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 3a168483bc8e..fbcfec4dc4cc 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -152,6 +152,33 @@
#else /* ! __ASSEMBLY__ */
+# define DEFINE_EXTABLE_TYPE_REG \
+ ".macro extable_type_reg type:req reg:req\n" \
+ ".set .Lfound, 0\n" \
+ ".set .Lregnr, 0\n" \
+ ".irp rs,rax,rcx,rdx,rbx,rsp,rbp,rsi,rdi,r8,r9,r10,r11,r12,r13,r14,r15\n" \
+ ".ifc \\reg, %%\\rs\n" \
+ ".set .Lfound, .Lfound+1\n" \
+ ".long \\type + (.Lregnr << 8)\n" \
+ ".endif\n" \
+ ".set .Lregnr, .Lregnr+1\n" \
+ ".endr\n" \
+ ".set .Lregnr, 0\n" \
+ ".irp rs,eax,ecx,edx,ebx,esp,ebp,esi,edi,r8d,r9d,r10d,r11d,r12d,r13d,r14d,r15d\n" \
+ ".ifc \\reg, %%\\rs\n" \
+ ".set .Lfound, .Lfound+1\n" \
+ ".long \\type + (.Lregnr << 8)\n" \
+ ".endif\n" \
+ ".set .Lregnr, .Lregnr+1\n" \
+ ".endr\n" \
+ ".if (.Lfound != 1)\n" \
+ ".error \"extable_type_reg: bad register argument\"\n" \
+ ".endif\n" \
+ ".endm\n"
+
+# define UNDEFINE_EXTABLE_TYPE_REG \
+ ".purgem extable_type_reg\n"
+
# define _ASM_EXTABLE_TYPE(from, to, type) \
" .pushsection \"__ex_table\",\"a\"\n" \
" .balign 4\n" \
@@ -160,6 +187,16 @@
" .long " __stringify(type) " \n" \
" .popsection\n"
+# define _ASM_EXTABLE_TYPE_REG(from, to, type, reg) \
+ " .pushsection \"__ex_table\",\"a\"\n" \
+ " .balign 4\n" \
+ " .long (" #from ") - .\n" \
+ " .long (" #to ") - .\n" \
+ DEFINE_EXTABLE_TYPE_REG \
+ "extable_type_reg reg=" __stringify(reg) ", type=" __stringify(type) " \n"\
+ UNDEFINE_EXTABLE_TYPE_REG \
+ " .popsection\n"
+
/* For C file, we already have NOKPROBE_SYMBOL macro */
/*
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 3ba772a69cc8..35389b2af88e 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -19,9 +19,9 @@
#define wmb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "sfence", \
X86_FEATURE_XMM2) ::: "memory", "cc")
#else
-#define mb() asm volatile("mfence":::"memory")
-#define rmb() asm volatile("lfence":::"memory")
-#define wmb() asm volatile("sfence" ::: "memory")
+#define __mb() asm volatile("mfence":::"memory")
+#define __rmb() asm volatile("lfence":::"memory")
+#define __wmb() asm volatile("sfence" ::: "memory")
#endif
/**
@@ -51,8 +51,8 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
/* Prevent speculative execution past this barrier. */
#define barrier_nospec() alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC)
-#define dma_rmb() barrier()
-#define dma_wmb() barrier()
+#define __dma_rmb() barrier()
+#define __dma_wmb() barrier()
#define __smp_mb() asm volatile("lock; addl $0,-4(%%" _ASM_SP ")" ::: "memory", "cc")
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 0367efdc5b7a..2edf68475fec 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -63,7 +63,7 @@ arch_set_bit(long nr, volatile unsigned long *addr)
}
static __always_inline void
-arch___set_bit(long nr, volatile unsigned long *addr)
+arch___set_bit(unsigned long nr, volatile unsigned long *addr)
{
asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}
@@ -89,7 +89,7 @@ arch_clear_bit_unlock(long nr, volatile unsigned long *addr)
}
static __always_inline void
-arch___clear_bit(long nr, volatile unsigned long *addr)
+arch___clear_bit(unsigned long nr, volatile unsigned long *addr)
{
asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}
@@ -114,7 +114,7 @@ arch___clear_bit_unlock(long nr, volatile unsigned long *addr)
}
static __always_inline void
-arch___change_bit(long nr, volatile unsigned long *addr)
+arch___change_bit(unsigned long nr, volatile unsigned long *addr)
{
asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}
@@ -145,7 +145,7 @@ arch_test_and_set_bit_lock(long nr, volatile unsigned long *addr)
}
static __always_inline bool
-arch___test_and_set_bit(long nr, volatile unsigned long *addr)
+arch___test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
{
bool oldbit;
@@ -171,7 +171,7 @@ arch_test_and_clear_bit(long nr, volatile unsigned long *addr)
* this without also updating arch/x86/kernel/kvm.c
*/
static __always_inline bool
-arch___test_and_clear_bit(long nr, volatile unsigned long *addr)
+arch___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
{
bool oldbit;
@@ -183,7 +183,7 @@ arch___test_and_clear_bit(long nr, volatile unsigned long *addr)
}
static __always_inline bool
-arch___test_and_change_bit(long nr, volatile unsigned long *addr)
+arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
{
bool oldbit;
@@ -207,6 +207,20 @@ static __always_inline bool constant_test_bit(long nr, const volatile unsigned l
(addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
}
+static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr)
+{
+ bool oldbit;
+
+ asm volatile("testb %2,%1"
+ CC_SET(nz)
+ : CC_OUT(nz) (oldbit)
+ : "m" (((unsigned char *)addr)[nr >> 3]),
+ "i" (1 << (nr & 7))
+ :"memory");
+
+ return oldbit;
+}
+
static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr)
{
bool oldbit;
@@ -219,10 +233,27 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
return oldbit;
}
-#define arch_test_bit(nr, addr) \
- (__builtin_constant_p((nr)) \
- ? constant_test_bit((nr), (addr)) \
- : variable_test_bit((nr), (addr)))
+static __always_inline bool
+arch_test_bit(unsigned long nr, const volatile unsigned long *addr)
+{
+ return __builtin_constant_p(nr) ? constant_test_bit(nr, addr) :
+ variable_test_bit(nr, addr);
+}
+
+static __always_inline bool
+arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr)
+{
+ return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) :
+ variable_test_bit(nr, addr);
+}
+
+static __always_inline unsigned long variable__ffs(unsigned long word)
+{
+ asm("rep; bsf %1,%0"
+ : "=r" (word)
+ : "rm" (word));
+ return word;
+}
/**
* __ffs - find first set bit in word
@@ -230,11 +261,16 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
*
* Undefined if no bit exists, so code should check against 0 first.
*/
-static __always_inline unsigned long __ffs(unsigned long word)
+#define __ffs(word) \
+ (__builtin_constant_p(word) ? \
+ (unsigned long)__builtin_ctzl(word) : \
+ variable__ffs(word))
+
+static __always_inline unsigned long variable_ffz(unsigned long word)
{
asm("rep; bsf %1,%0"
: "=r" (word)
- : "rm" (word));
+ : "r" (~word));
return word;
}
@@ -244,13 +280,10 @@ static __always_inline unsigned long __ffs(unsigned long word)
*
* Undefined if no zero exists, so code should check against ~0UL first.
*/
-static __always_inline unsigned long ffz(unsigned long word)
-{
- asm("rep; bsf %1,%0"
- : "=r" (word)
- : "r" (~word));
- return word;
-}
+#define ffz(word) \
+ (__builtin_constant_p(word) ? \
+ (unsigned long)__builtin_ctzl(~word) : \
+ variable_ffz(word))
/*
* __fls: find last set bit in word
@@ -269,18 +302,7 @@ static __always_inline unsigned long __fls(unsigned long word)
#undef ADDR
#ifdef __KERNEL__
-/**
- * ffs - find first set bit in word
- * @x: the word to search
- *
- * This is defined the same way as the libc and compiler builtin ffs
- * routines, therefore differs in spirit from the other bitops.
- *
- * ffs(value) returns 0 if value is 0 or the position of the first
- * set bit if value is nonzero. The first (least significant) bit
- * is at position 1.
- */
-static __always_inline int ffs(int x)
+static __always_inline int variable_ffs(int x)
{
int r;
@@ -311,6 +333,19 @@ static __always_inline int ffs(int x)
}
/**
+ * ffs - find first set bit in word
+ * @x: the word to search
+ *
+ * This is defined the same way as the libc and compiler builtin ffs
+ * routines, therefore differs in spirit from the other bitops.
+ *
+ * ffs(value) returns 0 if value is 0 or the position of the first
+ * set bit if value is nonzero. The first (least significant) bit
+ * is at position 1.
+ */
+#define ffs(x) (__builtin_constant_p(x) ? __builtin_ffs(x) : variable_ffs(x))
+
+/**
* fls - find last set bit in word
* @x: the word to search
*
@@ -380,8 +415,6 @@ static __always_inline int fls64(__u64 x)
#include <asm-generic/bitops/fls64.h>
#endif
-#include <asm-generic/bitops/find.h>
-
#include <asm-generic/bitops/sched.h>
#include <asm/arch_hweight.h>
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
index 981fe923a59f..53e9b0620d96 100644
--- a/arch/x86/include/asm/bootparam_utils.h
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -74,6 +74,7 @@ static void sanitize_boot_params(struct boot_params *boot_params)
BOOT_PARAM_PRESERVE(hdr),
BOOT_PARAM_PRESERVE(e820_table),
BOOT_PARAM_PRESERVE(eddbuf),
+ BOOT_PARAM_PRESERVE(cc_blob_address),
};
memset(&scratch, 0, sizeof(scratch));
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 84b87538a15d..a3ec87d198ac 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -4,6 +4,7 @@
#include <linux/stringify.h>
#include <linux/instrumentation.h>
+#include <linux/objtool.h>
/*
* Despite that some emulators terminate on UD2, we use it for WARN().
@@ -17,12 +18,12 @@
#ifdef CONFIG_X86_32
# define __BUG_REL(val) ".long " __stringify(val)
#else
-# define __BUG_REL(val) ".long " __stringify(val) " - 2b"
+# define __BUG_REL(val) ".long " __stringify(val) " - ."
#endif
#ifdef CONFIG_DEBUG_BUGVERBOSE
-#define _BUG_FLAGS(ins, flags) \
+#define _BUG_FLAGS(ins, flags, extra) \
do { \
asm_inline volatile("1:\t" ins "\n" \
".pushsection __bug_table,\"aw\"\n" \
@@ -31,7 +32,8 @@ do { \
"\t.word %c1" "\t# bug_entry::line\n" \
"\t.word %c2" "\t# bug_entry::flags\n" \
"\t.org 2b+%c3\n" \
- ".popsection" \
+ ".popsection\n" \
+ extra \
: : "i" (__FILE__), "i" (__LINE__), \
"i" (flags), \
"i" (sizeof(struct bug_entry))); \
@@ -39,14 +41,15 @@ do { \
#else /* !CONFIG_DEBUG_BUGVERBOSE */
-#define _BUG_FLAGS(ins, flags) \
+#define _BUG_FLAGS(ins, flags, extra) \
do { \
asm_inline volatile("1:\t" ins "\n" \
".pushsection __bug_table,\"aw\"\n" \
"2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \
"\t.word %c0" "\t# bug_entry::flags\n" \
"\t.org 2b+%c1\n" \
- ".popsection" \
+ ".popsection\n" \
+ extra \
: : "i" (flags), \
"i" (sizeof(struct bug_entry))); \
} while (0)
@@ -55,7 +58,7 @@ do { \
#else
-#define _BUG_FLAGS(ins, flags) asm volatile(ins)
+#define _BUG_FLAGS(ins, flags, extra) asm volatile(ins)
#endif /* CONFIG_GENERIC_BUG */
@@ -63,8 +66,8 @@ do { \
#define BUG() \
do { \
instrumentation_begin(); \
- _BUG_FLAGS(ASM_UD2, 0); \
- unreachable(); \
+ _BUG_FLAGS(ASM_UD2, 0, ""); \
+ __builtin_unreachable(); \
} while (0)
/*
@@ -75,9 +78,9 @@ do { \
*/
#define __WARN_FLAGS(flags) \
do { \
+ __auto_type __flags = BUGFLAG_WARNING|(flags); \
instrumentation_begin(); \
- _BUG_FLAGS(ASM_UD2, BUGFLAG_WARNING|(flags)); \
- annotate_reachable(); \
+ _BUG_FLAGS(ASM_UD2, __flags, ASM_REACHABLE); \
instrumentation_end(); \
} while (0)
diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h
new file mode 100644
index 000000000000..58dacd90daef
--- /dev/null
+++ b/arch/x86/include/asm/cfi.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_CFI_H
+#define _ASM_X86_CFI_H
+
+/*
+ * Clang Control Flow Integrity (CFI) support.
+ *
+ * Copyright (C) 2022 Google LLC
+ */
+
+#include <linux/cfi.h>
+
+#ifdef CONFIG_CFI_CLANG
+enum bug_trap_type handle_cfi_failure(struct pt_regs *regs);
+#else
+static inline enum bug_trap_type handle_cfi_failure(struct pt_regs *regs)
+{
+ return BUG_TRAP_TYPE_NONE;
+}
+#endif /* CONFIG_CFI_CLANG */
+
+#endif /* _ASM_X86_CFI_H */
diff --git a/arch/x86/include/asm/checksum.h b/arch/x86/include/asm/checksum.h
index bca625a60186..6df6ece8a28e 100644
--- a/arch/x86/include/asm/checksum.h
+++ b/arch/x86/include/asm/checksum.h
@@ -1,9 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1
-#define HAVE_CSUM_COPY_USER
-#define _HAVE_ARCH_CSUM_AND_COPY
-#ifdef CONFIG_X86_32
-# include <asm/checksum_32.h>
+#ifdef CONFIG_GENERIC_CSUM
+# include <asm-generic/checksum.h>
#else
-# include <asm/checksum_64.h>
+# define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1
+# define HAVE_CSUM_COPY_USER
+# define _HAVE_ARCH_CSUM_AND_COPY
+# ifdef CONFIG_X86_32
+# include <asm/checksum_32.h>
+# else
+# include <asm/checksum_64.h>
+# endif
#endif
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index 0a7fe0321613..215f5a65790f 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -42,6 +42,9 @@ static inline void set_64bit(volatile u64 *ptr, u64 value)
#define arch_cmpxchg64_local(ptr, o, n) \
((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o), \
(unsigned long long)(n)))
+#define arch_try_cmpxchg64(ptr, po, n) \
+ __try_cmpxchg64((ptr), (unsigned long long *)(po), \
+ (unsigned long long)(n))
#endif
static inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
@@ -70,6 +73,24 @@ static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
return prev;
}
+static inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *pold, u64 new)
+{
+ bool success;
+ u64 old = *pold;
+ asm volatile(LOCK_PREFIX "cmpxchg8b %[ptr]"
+ CC_SET(z)
+ : CC_OUT(z) (success),
+ [ptr] "+m" (*ptr),
+ "+A" (old)
+ : "b" ((u32)new),
+ "c" ((u32)(new >> 32))
+ : "memory");
+
+ if (unlikely(!success))
+ *pold = old;
+ return success;
+}
+
#ifndef CONFIG_X86_CMPXCHG64
/*
* Building a kernel capable running on 80386 and 80486. It may be necessary
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 072e5459fe2f..250187ac8248 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -19,6 +19,12 @@ static inline void set_64bit(volatile u64 *ptr, u64 val)
arch_cmpxchg_local((ptr), (o), (n)); \
})
+#define arch_try_cmpxchg64(ptr, po, n) \
+({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
+ arch_try_cmpxchg((ptr), (po), (n)); \
+})
+
#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX16)
#endif /* _ASM_X86_CMPXCHG_64_H */
diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h
new file mode 100644
index 000000000000..3d98c3a60d34
--- /dev/null
+++ b/arch/x86/include/asm/coco.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_COCO_H
+#define _ASM_X86_COCO_H
+
+#include <asm/types.h>
+
+enum cc_vendor {
+ CC_VENDOR_NONE,
+ CC_VENDOR_AMD,
+ CC_VENDOR_HYPERV,
+ CC_VENDOR_INTEL,
+};
+
+void cc_set_vendor(enum cc_vendor v);
+void cc_set_mask(u64 mask);
+
+#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
+u64 cc_mkenc(u64 val);
+u64 cc_mkdec(u64 val);
+#else
+static inline u64 cc_mkenc(u64 val)
+{
+ return val;
+}
+
+static inline u64 cc_mkdec(u64 val)
+{
+ return val;
+}
+#endif
+
+#endif /* _ASM_X86_COCO_H */
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 7516e4199b3c..b1221da477b7 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -15,28 +15,32 @@
#define compat_mode_t compat_mode_t
typedef u16 compat_mode_t;
+#define __compat_uid_t __compat_uid_t
+typedef u16 __compat_uid_t;
+typedef u16 __compat_gid_t;
+
+#define compat_dev_t compat_dev_t
+typedef u16 compat_dev_t;
+
+#define compat_ipc_pid_t compat_ipc_pid_t
+typedef u16 compat_ipc_pid_t;
+
+#define compat_statfs compat_statfs
+
#include <asm-generic/compat.h>
-#define COMPAT_USER_HZ 100
#define COMPAT_UTS_MACHINE "i686\0\0"
-typedef u16 __compat_uid_t;
-typedef u16 __compat_gid_t;
-typedef u16 compat_dev_t;
typedef u16 compat_nlink_t;
-typedef u16 compat_ipc_pid_t;
-typedef __kernel_fsid_t compat_fsid_t;
struct compat_stat {
- compat_dev_t st_dev;
- u16 __pad1;
+ u32 st_dev;
compat_ino_t st_ino;
compat_mode_t st_mode;
compat_nlink_t st_nlink;
__compat_uid_t st_uid;
__compat_gid_t st_gid;
- compat_dev_t st_rdev;
- u16 __pad2;
+ u32 st_rdev;
u32 st_size;
u32 st_blksize;
u32 st_blocks;
@@ -50,29 +54,11 @@ struct compat_stat {
u32 __unused5;
};
-struct compat_flock {
- short l_type;
- short l_whence;
- compat_off_t l_start;
- compat_off_t l_len;
- compat_pid_t l_pid;
-};
-
-#define F_GETLK64 12 /* using 'struct flock64' */
-#define F_SETLK64 13
-#define F_SETLKW64 14
-
/*
- * IA32 uses 4 byte alignment for 64 bit quantities,
- * so we need to pack this structure.
+ * IA32 uses 4 byte alignment for 64 bit quantities, so we need to pack the
+ * compat flock64 structure.
*/
-struct compat_flock64 {
- short l_type;
- short l_whence;
- compat_loff_t l_start;
- compat_loff_t l_len;
- compat_pid_t l_pid;
-} __attribute__((packed));
+#define __ARCH_NEED_COMPAT_FLOCK64_PACKED
struct compat_statfs {
int f_type;
@@ -89,68 +75,6 @@ struct compat_statfs {
int f_spare[4];
};
-#define COMPAT_RLIM_INFINITY 0xffffffff
-
-#define COMPAT_OFF_T_MAX 0x7fffffff
-
-struct compat_ipc64_perm {
- compat_key_t key;
- __compat_uid32_t uid;
- __compat_gid32_t gid;
- __compat_uid32_t cuid;
- __compat_gid32_t cgid;
- unsigned short mode;
- unsigned short __pad1;
- unsigned short seq;
- unsigned short __pad2;
- compat_ulong_t unused1;
- compat_ulong_t unused2;
-};
-
-struct compat_semid64_ds {
- struct compat_ipc64_perm sem_perm;
- compat_ulong_t sem_otime;
- compat_ulong_t sem_otime_high;
- compat_ulong_t sem_ctime;
- compat_ulong_t sem_ctime_high;
- compat_ulong_t sem_nsems;
- compat_ulong_t __unused3;
- compat_ulong_t __unused4;
-};
-
-struct compat_msqid64_ds {
- struct compat_ipc64_perm msg_perm;
- compat_ulong_t msg_stime;
- compat_ulong_t msg_stime_high;
- compat_ulong_t msg_rtime;
- compat_ulong_t msg_rtime_high;
- compat_ulong_t msg_ctime;
- compat_ulong_t msg_ctime_high;
- compat_ulong_t msg_cbytes;
- compat_ulong_t msg_qnum;
- compat_ulong_t msg_qbytes;
- compat_pid_t msg_lspid;
- compat_pid_t msg_lrpid;
- compat_ulong_t __unused4;
- compat_ulong_t __unused5;
-};
-
-struct compat_shmid64_ds {
- struct compat_ipc64_perm shm_perm;
- compat_size_t shm_segsz;
- compat_ulong_t shm_atime;
- compat_ulong_t shm_atime_high;
- compat_ulong_t shm_dtime;
- compat_ulong_t shm_dtime_high;
- compat_ulong_t shm_ctime;
- compat_ulong_t shm_ctime_high;
- compat_pid_t shm_cpid;
- compat_pid_t shm_lpid;
- compat_ulong_t shm_nattch;
- compat_ulong_t __unused4;
- compat_ulong_t __unused5;
-};
-
#ifdef CONFIG_X86_X32_ABI
#define COMPAT_USE_64BIT_TIME \
(!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT))
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index 33d41e350c79..b472ef76826a 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -7,6 +7,7 @@
#include <linux/topology.h>
#include <linux/nodemask.h>
#include <linux/percpu.h>
+#include <asm/ibt.h>
#ifdef CONFIG_SMP
@@ -35,6 +36,8 @@ extern int _debug_hotplug_cpu(int cpu, int action);
#endif
#endif
+extern void ap_init_aperfmperf(void);
+
int mwait_usable(const struct cpuinfo_x86 *);
unsigned int x86_family(unsigned int sig);
@@ -42,14 +45,12 @@ unsigned int x86_model(unsigned int sig);
unsigned int x86_stepping(unsigned int sig);
#ifdef CONFIG_CPU_SUP_INTEL
extern void __init sld_setup(struct cpuinfo_x86 *c);
-extern void switch_to_sld(unsigned long tifn);
extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
extern bool handle_guest_split_lock(unsigned long ip);
extern void handle_bus_lock(struct pt_regs *regs);
u8 get_this_hybrid_cpu_type(void);
#else
static inline void __init sld_setup(struct cpuinfo_x86 *c) {}
-static inline void switch_to_sld(unsigned long tifn) {}
static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
{
return false;
@@ -72,4 +73,27 @@ void init_ia32_feat_ctl(struct cpuinfo_x86 *c);
#else
static inline void init_ia32_feat_ctl(struct cpuinfo_x86 *c) {}
#endif
+
+extern __noendbr void cet_disable(void);
+
+struct ucode_cpu_info;
+
+int intel_cpu_collect_info(struct ucode_cpu_info *uci);
+
+static inline bool intel_cpu_signatures_match(unsigned int s1, unsigned int p1,
+ unsigned int s2, unsigned int p2)
+{
+ if (s1 != s2)
+ return false;
+
+ /* Processor flags are either both 0 ... */
+ if (!p1 && !p2)
+ return true;
+
+ /* ... or they intersect. */
+ return p1 & p2;
+}
+
+extern u64 x86_read_arch_cap_msr(void);
+
#endif /* _ASM_X86_CPU_H */
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index dd5ea1bdf04c..75efc4c6f076 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -143,7 +143,7 @@ extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
-static inline struct entry_stack *cpu_entry_stack(int cpu)
+static __always_inline struct entry_stack *cpu_entry_stack(int cpu)
{
return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
}
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 1261842d006c..1a85e1fb0922 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -34,14 +34,17 @@ enum cpuid_leafs
CPUID_8000_001F_EAX,
};
+#define X86_CAP_FMT_NUM "%d:%d"
+#define x86_cap_flag_num(flag) ((flag) >> 5), ((flag) & 31)
+
#ifdef CONFIG_X86_FEATURE_NAMES
extern const char * const x86_cap_flags[NCAPINTS*32];
extern const char * const x86_power_flags[32];
#define X86_CAP_FMT "%s"
#define x86_cap_flag(flag) x86_cap_flags[flag]
#else
-#define X86_CAP_FMT "%d:%d"
-#define x86_cap_flag(flag) ((flag) >> 5), ((flag) & 31)
+#define X86_CAP_FMT X86_CAP_FMT_NUM
+#define x86_cap_flag x86_cap_flag_num
#endif
/*
@@ -51,7 +54,7 @@ extern const char * const x86_power_flags[32];
extern const char * const x86_bug_flags[NBUGINTS*32];
#define test_cpu_cap(c, bit) \
- test_bit(bit, (unsigned long *)((c)->x86_capability))
+ arch_test_bit(bit, (unsigned long *)((c)->x86_capability))
/*
* There are 32 bits/features in each mask word. The high bits
@@ -152,20 +155,6 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
-#if defined(__clang__) && !defined(CONFIG_CC_HAS_ASM_GOTO)
-
-/*
- * Workaround for the sake of BPF compilation which utilizes kernel
- * headers, but clang does not support ASM GOTO and fails the build.
- */
-#ifndef __BPF_TRACING__
-#warning "Compiler lacks ASM_GOTO support. Add -D __BPF_TRACING__ to your compiler arguments"
-#endif
-
-#define static_cpu_has(bit) boot_cpu_has(bit)
-
-#else
-
/*
* Static testing of CPU features. Used the same as boot_cpu_has(). It
* statically patches the target code for additional performance. Use
@@ -205,7 +194,6 @@ t_no:
boot_cpu_has(bit) : \
_static_cpu_has(bit) \
)
-#endif
#define cpu_has_bug(c, bit) cpu_has(c, (bit))
#define set_cpu_bug(c, bit) set_cpu_cap(c, (bit))
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index d5b5f2ab87a0..b71f4f2ecdd5 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -96,7 +96,7 @@
#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
-/* FREE! ( 3*32+17) */
+#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */
#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
@@ -201,17 +201,17 @@
#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-/* FREE! ( 7*32+10) */
+#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
-#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
-#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */
+#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */
#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
-/* FREE! ( 7*32+20) */
+#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* AMD Performance Monitoring Version 2 */
#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */
@@ -219,7 +219,7 @@
#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */
#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 or above (Zen) */
+#define X86_FEATURE_ZEN (7*32+28) /* "" CPU based on Zen microarchitecture */
#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */
#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */
#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */
@@ -238,6 +238,7 @@
#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* "" PV unlock function */
#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* "" PV vcpu_is_preempted function */
+#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* Intel Trust Domain Extensions Guest */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
@@ -295,11 +296,18 @@
#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */
#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
+#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */
+#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */
+#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
+#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
+#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
+#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */
+#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
-#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
@@ -315,6 +323,9 @@
#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
+#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */
+#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
+#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
@@ -327,6 +338,7 @@
#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
+#define X86_FEATURE_HFI (14*32+19) /* Hardware Feedback Interface */
/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
@@ -342,6 +354,7 @@
#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
+#define X86_FEATURE_X2AVIC (15*32+18) /* Virtual x2apic */
#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */
#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */
@@ -387,7 +400,11 @@
#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */
+#define X86_FEATURE_IBT (18*32+20) /* Indirect Branch Tracking */
+#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */
#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */
+#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */
+#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */
@@ -400,6 +417,7 @@
#define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */
#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */
#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
+#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */
#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */
/*
@@ -438,5 +456,9 @@
#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
+#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
+#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */
+#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */
+#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/cpuid.h b/arch/x86/include/asm/cpuid.h
new file mode 100644
index 000000000000..70b2db18165e
--- /dev/null
+++ b/arch/x86/include/asm/cpuid.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * CPUID-related helpers/definitions
+ *
+ * Derived from arch/x86/kvm/cpuid.c
+ */
+
+#ifndef _ASM_X86_CPUID_H
+#define _ASM_X86_CPUID_H
+
+static __always_inline bool cpuid_function_is_indexed(u32 function)
+{
+ switch (function) {
+ case 4:
+ case 7:
+ case 0xb:
+ case 0xd:
+ case 0xf:
+ case 0x10:
+ case 0x12:
+ case 0x14:
+ case 0x17:
+ case 0x18:
+ case 0x1d:
+ case 0x1e:
+ case 0x1f:
+ case 0x8000001d:
+ return true;
+ }
+
+ return false;
+}
+
+#endif /* _ASM_X86_CPUID_H */
diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h
index 3afa990d756b..c5aed9e9226c 100644
--- a/arch/x86/include/asm/cpumask.h
+++ b/arch/x86/include/asm/cpumask.h
@@ -20,11 +20,21 @@ static __always_inline bool arch_cpu_online(int cpu)
{
return arch_test_bit(cpu, cpumask_bits(cpu_online_mask));
}
+
+static __always_inline void arch_cpumask_clear_cpu(int cpu, struct cpumask *dstp)
+{
+ arch_clear_bit(cpumask_check(cpu), cpumask_bits(dstp));
+}
#else
static __always_inline bool arch_cpu_online(int cpu)
{
return cpu == 0;
}
+
+static __always_inline void arch_cpumask_clear_cpu(int cpu, struct cpumask *dstp)
+{
+ return;
+}
#endif
#define arch_cpu_is_offline(cpu) unlikely(!arch_cpu_online(cpu))
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 8f28fafa98b3..33d2cd04d254 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -10,12 +10,6 @@
* cpu_feature_enabled().
*/
-#ifdef CONFIG_X86_SMAP
-# define DISABLE_SMAP 0
-#else
-# define DISABLE_SMAP (1<<(X86_FEATURE_SMAP & 31))
-#endif
-
#ifdef CONFIG_X86_UMIP
# define DISABLE_UMIP 0
#else
@@ -56,8 +50,30 @@
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
#endif
-/* Force disable because it's broken beyond repair */
-#define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
+#ifdef CONFIG_RETPOLINE
+# define DISABLE_RETPOLINE 0
+#else
+# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
+ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
+#endif
+
+#ifdef CONFIG_RETHUNK
+# define DISABLE_RETHUNK 0
+#else
+# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31))
+#endif
+
+#ifdef CONFIG_CPU_UNRET_ENTRY
+# define DISABLE_UNRET 0
+#else
+# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31))
+#endif
+
+#ifdef CONFIG_INTEL_IOMMU_SVM
+# define DISABLE_ENQCMD 0
+#else
+# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
+#endif
#ifdef CONFIG_X86_SGX
# define DISABLE_SGX 0
@@ -65,6 +81,12 @@
# define DISABLE_SGX (1 << (X86_FEATURE_SGX & 31))
#endif
+#ifdef CONFIG_INTEL_TDX_GUEST
+# define DISABLE_TDX_GUEST 0
+#else
+# define DISABLE_TDX_GUEST (1 << (X86_FEATURE_TDX_GUEST & 31))
+#endif
+
/*
* Make sure to add features to the correct mask
*/
@@ -76,10 +98,10 @@
#define DISABLED_MASK5 0
#define DISABLED_MASK6 0
#define DISABLED_MASK7 (DISABLE_PTI)
-#define DISABLED_MASK8 0
-#define DISABLED_MASK9 (DISABLE_SMAP|DISABLE_SGX)
+#define DISABLED_MASK8 (DISABLE_TDX_GUEST)
+#define DISABLED_MASK9 (DISABLE_SGX)
#define DISABLED_MASK10 0
-#define DISABLED_MASK11 0
+#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET)
#define DISABLED_MASK12 0
#define DISABLED_MASK13 0
#define DISABLED_MASK14 0
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index bb1654fe0ce7..1c66708e3062 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -2,18 +2,6 @@
#ifndef _ASM_X86_DMA_MAPPING_H
#define _ASM_X86_DMA_MAPPING_H
-/*
- * IOMMU interface. See Documentation/core-api/dma-api-howto.rst and
- * Documentation/core-api/dma-api.rst for documentation.
- */
-
-#include <linux/scatterlist.h>
-#include <asm/io.h>
-#include <asm/swiotlb.h>
-
-extern int iommu_merge;
-extern int panic_on_overflow;
-
extern const struct dma_map_ops *dma_ops;
static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h
index 8e95aa4b0d17..8ae6e0e11b8b 100644
--- a/arch/x86/include/asm/dma.h
+++ b/arch/x86/include/asm/dma.h
@@ -307,12 +307,4 @@ extern int request_dma(unsigned int dmanr, const char *device_id);
extern void free_dma(unsigned int dmanr);
#endif
-/* From PCI */
-
-#ifdef CONFIG_PCI
-extern int isa_dma_bridge_buggy;
-#else
-#define isa_dma_bridge_buggy (0)
-#endif
-
#endif /* _ASM_X86_DMA_H */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 4d0b126835b8..233ae6986d6f 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -7,6 +7,7 @@
#include <asm/tlb.h>
#include <asm/nospec-branch.h>
#include <asm/mmu_context.h>
+#include <asm/ibt.h>
#include <linux/build_bug.h>
#include <linux/kernel.h>
#include <linux/pgtable.h>
@@ -46,13 +47,14 @@ extern unsigned long efi_mixed_mode_stack_pa;
#define __efi_nargs(...) __efi_nargs_(__VA_ARGS__)
#define __efi_nargs_(...) __efi_nargs__(0, ##__VA_ARGS__, \
+ __efi_arg_sentinel(9), __efi_arg_sentinel(8), \
__efi_arg_sentinel(7), __efi_arg_sentinel(6), \
__efi_arg_sentinel(5), __efi_arg_sentinel(4), \
__efi_arg_sentinel(3), __efi_arg_sentinel(2), \
__efi_arg_sentinel(1), __efi_arg_sentinel(0))
-#define __efi_nargs__(_0, _1, _2, _3, _4, _5, _6, _7, n, ...) \
+#define __efi_nargs__(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, n, ...) \
__take_second_arg(n, \
- ({ BUILD_BUG_ON_MSG(1, "__efi_nargs limit exceeded"); 8; }))
+ ({ BUILD_BUG_ON_MSG(1, "__efi_nargs limit exceeded"); 10; }))
#define __efi_arg_sentinel(n) , n
/*
@@ -98,8 +100,6 @@ static inline void efi_fpu_end(void)
efi_fpu_end(); \
})
-#define arch_efi_call_virt(p, f, args...) p->f(args)
-
#else /* !CONFIG_X86_32 */
#define EFI_LOADER_SIGNATURE "EL64"
@@ -119,8 +119,13 @@ extern asmlinkage u64 __efi_call(void *fp, ...);
efi_enter_mm(); \
})
-#define arch_efi_call_virt(p, f, args...) \
- efi_call((void *)p->f, args) \
+#undef arch_efi_call_virt
+#define arch_efi_call_virt(p, f, args...) ({ \
+ u64 ret, ibt = ibt_save(); \
+ ret = efi_call((void *)p->f, args); \
+ ibt_restore(ibt); \
+ ret; \
+})
#define arch_efi_call_virt_teardown() \
({ \
@@ -176,8 +181,9 @@ extern u64 efi_setup;
extern efi_status_t __efi64_thunk(u32, ...);
#define efi64_thunk(...) ({ \
- __efi_nargs_check(efi64_thunk, 6, __VA_ARGS__); \
- __efi64_thunk(__VA_ARGS__); \
+ u64 __pad[3]; /* must have space for 3 args on the stack */ \
+ __efi_nargs_check(efi64_thunk, 9, __VA_ARGS__); \
+ __efi64_thunk(__VA_ARGS__, __pad); \
})
static inline bool efi_is_mixed(void)
@@ -197,8 +203,6 @@ static inline bool efi_runtime_supported(void)
extern void parse_efi_setup(u64 phys_addr, u32 data_len);
-extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
-
extern void efi_thunk_runtime_setup(void);
efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size,
unsigned long descriptor_size,
@@ -265,6 +269,8 @@ static inline u32 efi64_convert_status(efi_status_t status)
return (u32)(status | (u64)status >> 32);
}
+#define __efi64_split(val) (val) & U32_MAX, (u64)(val) >> 32
+
#define __efi64_argmap_free_pages(addr, size) \
((addr), 0, (size))
@@ -308,6 +314,17 @@ static inline u32 efi64_convert_status(efi_status_t status)
#define __efi64_argmap_query_mode(gop, mode, size, info) \
((gop), (mode), efi64_zero_upper(size), efi64_zero_upper(info))
+/* TCG2 protocol */
+#define __efi64_argmap_hash_log_extend_event(prot, fl, addr, size, ev) \
+ ((prot), (fl), 0ULL, (u64)(addr), 0ULL, (u64)(size), 0ULL, ev)
+
+/* DXE services */
+#define __efi64_argmap_get_memory_space_descriptor(phys, desc) \
+ (__efi64_split(phys), (desc))
+
+#define __efi64_argmap_set_memory_space_attributes(phys, size, flags) \
+ (__efi64_split(phys), __efi64_split(size), __efi64_split(flags))
+
/*
* The macros below handle the plumbing for the argument mapping. To add a
* mapping for a specific EFI method, simply define a macro
@@ -348,6 +365,11 @@ static inline u32 efi64_convert_status(efi_status_t status)
runtime), \
func, __VA_ARGS__))
+#define efi_dxe_call(func, ...) \
+ (efi_is_native() \
+ ? efi_dxe_table->func(__VA_ARGS__) \
+ : __efi64_thunk_map(efi_dxe_table, func, __VA_ARGS__))
+
#else /* CONFIG_EFI_MIXED */
static inline bool efi_is_64bit(void)
@@ -360,7 +382,6 @@ static inline bool efi_is_64bit(void)
extern bool efi_reboot_required(void);
extern bool efi_is_table_address(unsigned long phys_addr);
-extern void efi_find_mirror(void);
extern void efi_reserve_boot_services(void);
#else
static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {}
@@ -372,9 +393,6 @@ static inline bool efi_is_table_address(unsigned long phys_addr)
{
return false;
}
-static inline void efi_find_mirror(void)
-{
-}
static inline void efi_reserve_boot_services(void)
{
}
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 29fea180a665..cb0ff1055ab1 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -116,7 +116,7 @@ extern unsigned int vdso32_enabled;
* now struct_user_regs, they are different)
*/
-#define ELF_CORE_COPY_REGS_COMMON(pr_reg, regs) \
+#define ELF_CORE_COPY_REGS(pr_reg, regs) \
do { \
pr_reg[0] = regs->bx; \
pr_reg[1] = regs->cx; \
@@ -128,6 +128,7 @@ do { \
pr_reg[7] = regs->ds; \
pr_reg[8] = regs->es; \
pr_reg[9] = regs->fs; \
+ savesegment(gs, pr_reg[10]); \
pr_reg[11] = regs->orig_ax; \
pr_reg[12] = regs->ip; \
pr_reg[13] = regs->cs; \
@@ -136,18 +137,6 @@ do { \
pr_reg[16] = regs->ss; \
} while (0);
-#define ELF_CORE_COPY_REGS(pr_reg, regs) \
-do { \
- ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
- pr_reg[10] = get_user_gs(regs); \
-} while (0);
-
-#define ELF_CORE_COPY_KERNEL_REGS(pr_reg, regs) \
-do { \
- ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
- savesegment(gs, pr_reg[10]); \
-} while (0);
-
#define ELF_PLATFORM (utsname()->machine)
#define set_personality_64bit() do { } while (0)
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index 43184640b579..674ed46d3ced 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -10,7 +10,7 @@
#include <asm/fpu/api.h>
/* Check that the stack and regs on entry from user mode are sane. */
-static __always_inline void arch_check_user_regs(struct pt_regs *regs)
+static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
{
if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) {
/*
@@ -42,7 +42,7 @@ static __always_inline void arch_check_user_regs(struct pt_regs *regs)
WARN_ON_ONCE(regs != task_pt_regs(current));
}
}
-#define arch_check_user_regs arch_check_user_regs
+#define arch_enter_from_user_mode arch_enter_from_user_mode
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
unsigned long ti_work)
diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h
index 93f400eb728f..eeed395c3177 100644
--- a/arch/x86/include/asm/extable.h
+++ b/arch/x86/include/asm/extable.h
@@ -21,7 +21,7 @@
*/
struct exception_table_entry {
- int insn, fixup, type;
+ int insn, fixup, data;
};
struct pt_regs;
@@ -31,8 +31,8 @@ struct pt_regs;
do { \
(a)->fixup = (b)->fixup + (delta); \
(b)->fixup = (tmp).fixup - (delta); \
- (a)->type = (b)->type; \
- (b)->type = (tmp).type; \
+ (a)->data = (b)->data; \
+ (b)->data = (tmp).data; \
} while (0)
extern int fixup_exception(struct pt_regs *regs, int trapnr,
@@ -42,9 +42,13 @@ extern int ex_get_fixup_type(unsigned long ip);
extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
#ifdef CONFIG_X86_MCE
-extern void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr);
+extern void __noreturn ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr);
#else
-static inline void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr) { }
+static inline void __noreturn ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr)
+{
+ for (;;)
+ cpu_relax();
+}
#endif
#if defined(CONFIG_BPF_JIT) && defined(CONFIG_X86_64)
diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h
index 409524d5d2eb..991e31cfde94 100644
--- a/arch/x86/include/asm/extable_fixup_types.h
+++ b/arch/x86/include/asm/extable_fixup_types.h
@@ -2,6 +2,36 @@
#ifndef _ASM_X86_EXTABLE_FIXUP_TYPES_H
#define _ASM_X86_EXTABLE_FIXUP_TYPES_H
+/*
+ * Our IMM is signed, as such it must live at the top end of the word. Also,
+ * since C99 hex constants are of ambigious type, force cast the mask to 'int'
+ * so that FIELD_GET() will DTRT and sign extend the value when it extracts it.
+ */
+#define EX_DATA_TYPE_MASK ((int)0x000000FF)
+#define EX_DATA_REG_MASK ((int)0x00000F00)
+#define EX_DATA_FLAG_MASK ((int)0x0000F000)
+#define EX_DATA_IMM_MASK ((int)0xFFFF0000)
+
+#define EX_DATA_REG_SHIFT 8
+#define EX_DATA_FLAG_SHIFT 12
+#define EX_DATA_IMM_SHIFT 16
+
+#define EX_DATA_REG(reg) ((reg) << EX_DATA_REG_SHIFT)
+#define EX_DATA_FLAG(flag) ((flag) << EX_DATA_FLAG_SHIFT)
+#define EX_DATA_IMM(imm) ((imm) << EX_DATA_IMM_SHIFT)
+
+/* segment regs */
+#define EX_REG_DS EX_DATA_REG(8)
+#define EX_REG_ES EX_DATA_REG(9)
+#define EX_REG_FS EX_DATA_REG(10)
+#define EX_REG_GS EX_DATA_REG(11)
+
+/* flags */
+#define EX_FLAG_CLEAR_AX EX_DATA_FLAG(1)
+#define EX_FLAG_CLEAR_DX EX_DATA_FLAG(2)
+#define EX_FLAG_CLEAR_AX_DX EX_DATA_FLAG(3)
+
+/* types */
#define EX_TYPE_NONE 0
#define EX_TYPE_DEFAULT 1
#define EX_TYPE_FAULT 2
@@ -9,14 +39,31 @@
#define EX_TYPE_COPY 4
#define EX_TYPE_CLEAR_FS 5
#define EX_TYPE_FPU_RESTORE 6
-#define EX_TYPE_WRMSR 7
-#define EX_TYPE_RDMSR 8
-#define EX_TYPE_BPF 9
+#define EX_TYPE_BPF 7
+#define EX_TYPE_WRMSR 8
+#define EX_TYPE_RDMSR 9
+#define EX_TYPE_WRMSR_SAFE 10 /* reg := -EIO */
+#define EX_TYPE_RDMSR_SAFE 11 /* reg := -EIO */
+#define EX_TYPE_WRMSR_IN_MCE 12
+#define EX_TYPE_RDMSR_IN_MCE 13
+#define EX_TYPE_DEFAULT_MCE_SAFE 14
+#define EX_TYPE_FAULT_MCE_SAFE 15
+
+#define EX_TYPE_POP_REG 16 /* sp += sizeof(long) */
+#define EX_TYPE_POP_ZERO (EX_TYPE_POP_REG | EX_DATA_IMM(0))
+
+#define EX_TYPE_IMM_REG 17 /* reg := (long)imm */
+#define EX_TYPE_EFAULT_REG (EX_TYPE_IMM_REG | EX_DATA_IMM(-EFAULT))
+#define EX_TYPE_ZERO_REG (EX_TYPE_IMM_REG | EX_DATA_IMM(0))
+#define EX_TYPE_ONE_REG (EX_TYPE_IMM_REG | EX_DATA_IMM(1))
+
+#define EX_TYPE_FAULT_SGX 18
-#define EX_TYPE_WRMSR_IN_MCE 10
-#define EX_TYPE_RDMSR_IN_MCE 11
+#define EX_TYPE_UCOPY_LEN 19 /* cx := reg + imm*cx */
+#define EX_TYPE_UCOPY_LEN1 (EX_TYPE_UCOPY_LEN | EX_DATA_IMM(1))
+#define EX_TYPE_UCOPY_LEN4 (EX_TYPE_UCOPY_LEN | EX_DATA_IMM(4))
+#define EX_TYPE_UCOPY_LEN8 (EX_TYPE_UCOPY_LEN | EX_DATA_IMM(8))
-#define EX_TYPE_DEFAULT_MCE_SAFE 12
-#define EX_TYPE_FAULT_MCE_SAFE 13
+#define EX_TYPE_ZEROPAD 20 /* longword load with zeropad on fault */
#endif
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index c2767a6a387e..503a577814b2 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -132,10 +132,21 @@ static inline void fpstate_free(struct fpu *fpu) { }
/* fpstate-related functions which are exported to KVM */
extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature);
+extern u64 xstate_get_guest_group_perm(void);
+
/* KVM specific functions */
extern bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu);
extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu);
extern int fpu_swap_kvm_fpstate(struct fpu_guest *gfpu, bool enter_guest);
+extern int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatures);
+
+#ifdef CONFIG_X86_64
+extern void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd);
+extern void fpu_sync_guest_vmexit_xfd_state(void);
+#else
+static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) { }
+static inline void fpu_sync_guest_vmexit_xfd_state(void) { }
+#endif
extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru);
extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru);
@@ -151,7 +162,8 @@ static inline bool fpstate_is_confidential(struct fpu_guest *gfpu)
}
/* prctl */
-struct task_struct;
-extern long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2);
+extern long fpu_xstate_prctl(int option, unsigned long arg2);
+
+extern void fpu_idle_fpregs(void);
#endif /* _ASM_X86_FPU_API_H */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/arch/x86/include/asm/fpu/internal.h
+++ /dev/null
diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h
index 99a8820e8cc4..b2486b2cbc6e 100644
--- a/arch/x86/include/asm/fpu/sched.h
+++ b/arch/x86/include/asm/fpu/sched.h
@@ -11,7 +11,7 @@
extern void save_fpregs_to_fpstate(struct fpu *fpu);
extern void fpu__drop(struct fpu *fpu);
-extern int fpu_clone(struct task_struct *dst, unsigned long clone_flags);
+extern int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal);
extern void fpu_flush_thread(void);
/*
diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h
index 22b0273a8bf1..e1c9df9102a5 100644
--- a/arch/x86/include/asm/fpu/signal.h
+++ b/arch/x86/include/asm/fpu/signal.h
@@ -41,7 +41,4 @@ extern void fpu__clear_user_states(struct fpu *fpu);
extern bool fpu__restore_sig(void __user *buf, int ia32_frame);
extern void restore_fpregs_from_fpstate(struct fpstate *fpstate, u64 mask);
-
-extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size);
-
#endif /* _ASM_X86_FPU_SIGNAL_H */
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 3c06c82ab355..eb7cd1139d97 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -387,6 +387,8 @@ struct fpstate {
/* @regs is dynamically sized! Don't add anything after @regs! */
} __aligned(64);
+#define FPU_GUEST_PERM_LOCKED BIT_ULL(63)
+
struct fpu_state_perm {
/*
* @__state_perm:
@@ -477,6 +479,13 @@ struct fpu {
struct fpu_state_perm perm;
/*
+ * @guest_perm:
+ *
+ * Permission related information for guest pseudo FPUs
+ */
+ struct fpu_state_perm guest_perm;
+
+ /*
* @__fpstate:
*
* Initial in-memory storage for FPU registers which are saved in
@@ -496,6 +505,29 @@ struct fpu {
*/
struct fpu_guest {
/*
+ * @xfeatures: xfeature bitmap of features which are
+ * currently enabled for the guest vCPU.
+ */
+ u64 xfeatures;
+
+ /*
+ * @perm: xfeature bitmap of features which are
+ * permitted to be enabled for the guest
+ * vCPU.
+ */
+ u64 perm;
+
+ /*
+ * @xfd_err: Save the guest value.
+ */
+ u64 xfd_err;
+
+ /*
+ * @uabi_size: Size required for save/restore
+ */
+ unsigned int uabi_size;
+
+ /*
* @fpstate: Pointer to the allocated guest fpstate
*/
struct fpstate *fpstate;
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 024d9797646e..908d99b127d3 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -9,6 +9,13 @@
# define MCOUNT_ADDR ((unsigned long)(__fentry__))
#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */
+/* Ignore unused weak functions which will have non zero offsets */
+#ifdef CONFIG_HAVE_FENTRY
+# include <asm/ibt.h>
+/* Add offset for endbr64 if IBT enabled */
+# define FTRACE_MCOUNT_MAX_OFFSET ENDBR_INSN_SIZE
+#endif
+
#ifdef CONFIG_DYNAMIC_FTRACE
#define ARCH_SUPPORTS_FTRACE_OPS 1
#endif
@@ -16,7 +23,6 @@
#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
#ifndef __ASSEMBLY__
-extern atomic_t modifying_ftrace_code;
extern void __fentry__(void);
static inline unsigned long ftrace_call_adjust(unsigned long addr)
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index f9c00110a69a..99d345b686fa 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -17,13 +17,9 @@ do { \
int oldval = 0, ret; \
asm volatile("1:\t" insn "\n" \
"2:\n" \
- "\t.section .fixup,\"ax\"\n" \
- "3:\tmov\t%3, %1\n" \
- "\tjmp\t2b\n" \
- "\t.previous\n" \
- _ASM_EXTABLE_UA(1b, 3b) \
+ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %1) \
: "=r" (oldval), "=r" (ret), "+m" (*uaddr) \
- : "i" (-EFAULT), "0" (oparg), "1" (0)); \
+ : "0" (oparg), "1" (0)); \
if (ret) \
goto label; \
*oval = oldval; \
@@ -39,15 +35,11 @@ do { \
"3:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \
"\tjnz\t2b\n" \
"4:\n" \
- "\t.section .fixup,\"ax\"\n" \
- "5:\tmov\t%5, %1\n" \
- "\tjmp\t4b\n" \
- "\t.previous\n" \
- _ASM_EXTABLE_UA(1b, 5b) \
- _ASM_EXTABLE_UA(3b, 5b) \
+ _ASM_EXTABLE_TYPE_REG(1b, 4b, EX_TYPE_EFAULT_REG, %1) \
+ _ASM_EXTABLE_TYPE_REG(3b, 4b, EX_TYPE_EFAULT_REG, %1) \
: "=&a" (oldval), "=&r" (ret), \
"+m" (*uaddr), "=&r" (tem) \
- : "r" (oparg), "i" (-EFAULT), "1" (0)); \
+ : "r" (oparg), "1" (0)); \
if (ret) \
goto label; \
*oval = oldval; \
@@ -95,15 +87,11 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
if (!user_access_begin(uaddr, sizeof(u32)))
return -EFAULT;
asm volatile("\n"
- "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n"
+ "1:\t" LOCK_PREFIX "cmpxchgl %3, %2\n"
"2:\n"
- "\t.section .fixup, \"ax\"\n"
- "3:\tmov %3, %0\n"
- "\tjmp 2b\n"
- "\t.previous\n"
- _ASM_EXTABLE_UA(1b, 3b)
+ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %0) \
: "+r" (ret), "=a" (oldval), "+m" (*uaddr)
- : "i" (-EFAULT), "r" (newval), "1" (oldval)
+ : "r" (newval), "1" (oldval)
: "memory"
);
user_access_end();
diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h
index 318556574345..5af8088a10df 100644
--- a/arch/x86/include/asm/gart.h
+++ b/arch/x86/include/asm/gart.h
@@ -38,7 +38,7 @@ extern int gart_iommu_aperture_disabled;
extern void early_gart_iommu_check(void);
extern int gart_iommu_init(void);
extern void __init gart_parse_options(char *);
-extern int gart_iommu_hole_init(void);
+void gart_iommu_hole_init(void);
#else
#define gart_iommu_aperture 0
@@ -51,9 +51,8 @@ static inline void early_gart_iommu_check(void)
static inline void gart_parse_options(char *options)
{
}
-static inline int gart_iommu_hole_init(void)
+static inline void gart_iommu_hole_init(void)
{
- return -ENODEV;
}
#endif
diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h
index 032e020853aa..731ee7cc40a5 100644
--- a/arch/x86/include/asm/highmem.h
+++ b/arch/x86/include/asm/highmem.h
@@ -26,6 +26,7 @@
#include <asm/tlbflush.h>
#include <asm/paravirt.h>
#include <asm/fixmap.h>
+#include <asm/pgtable_areas.h>
/* declarations for highmem.c */
extern unsigned long highstart_pfn, highend_pfn;
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index a1f0e90d0818..0bc931cd0698 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -44,10 +44,7 @@ struct arch_hw_breakpoint {
/* Total number of available HW breakpoint registers */
#define HBP_NUM 4
-static inline int hw_breakpoint_slots(int type)
-{
- return HBP_NUM;
-}
+#define hw_breakpoint_slots(type) (HBP_NUM)
struct perf_event_attr;
struct perf_event;
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 381e88122a5f..3089ec352743 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -138,6 +138,9 @@
#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18)
#define HV_X64_NESTED_MSR_BITMAP BIT(19)
+/* Nested features #2. These are HYPERV_CPUID_NESTED_FEATURES.EBX bits. */
+#define HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL BIT(0)
+
/*
* This is specific to AMD and specifies that enlightened TLB flush is
* supported. If guest opts in to this feature, ASID invalidations only
@@ -546,7 +549,7 @@ struct hv_enlightened_vmcs {
u64 guest_rip;
u32 hv_clean_fields;
- u32 hv_padding_32;
+ u32 padding32_1;
u32 hv_synthetic_controls;
struct {
u32 nested_flush_hypercall:1;
@@ -554,14 +557,25 @@ struct hv_enlightened_vmcs {
u32 reserved:30;
} __packed hv_enlightenments_control;
u32 hv_vp_id;
-
+ u32 padding32_2;
u64 hv_vm_id;
u64 partition_assist_page;
u64 padding64_4[4];
u64 guest_bndcfgs;
- u64 padding64_5[7];
+ u64 guest_ia32_perf_global_ctrl;
+ u64 guest_ia32_s_cet;
+ u64 guest_ssp;
+ u64 guest_ia32_int_ssp_table_addr;
+ u64 guest_ia32_lbr_ctl;
+ u64 padding64_5[2];
u64 xss_exit_bitmap;
- u64 padding64_6[7];
+ u64 encls_exiting_bitmap;
+ u64 host_ia32_perf_global_ctrl;
+ u64 tsc_multiplier;
+ u64 host_ia32_s_cet;
+ u64 host_ssp;
+ u64 host_ia32_int_ssp_table_addr;
+ u64 padding64_6;
} __packed;
#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0
@@ -602,6 +616,39 @@ enum hv_interrupt_type {
HV_X64_INTERRUPT_TYPE_MAXIMUM = 0x000A,
};
+union hv_msi_address_register {
+ u32 as_uint32;
+ struct {
+ u32 reserved1:2;
+ u32 destination_mode:1;
+ u32 redirection_hint:1;
+ u32 reserved2:8;
+ u32 destination_id:8;
+ u32 msi_base:12;
+ };
+} __packed;
+
+union hv_msi_data_register {
+ u32 as_uint32;
+ struct {
+ u32 vector:8;
+ u32 delivery_mode:3;
+ u32 reserved1:3;
+ u32 level_assert:1;
+ u32 trigger_mode:1;
+ u32 reserved2:16;
+ };
+} __packed;
+
+/* HvRetargetDeviceInterrupt hypercall */
+union hv_msi_entry {
+ u64 as_uint64;
+ struct {
+ union hv_msi_address_register address;
+ union hv_msi_data_register data;
+ } __packed;
+};
+
#include <asm-generic/hyperv-tlfs.h>
#endif
diff --git a/arch/x86/include/asm/ibt.h b/arch/x86/include/asm/ibt.h
new file mode 100644
index 000000000000..9b08082a5d9f
--- /dev/null
+++ b/arch/x86/include/asm/ibt.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_IBT_H
+#define _ASM_X86_IBT_H
+
+#include <linux/types.h>
+
+/*
+ * The rules for enabling IBT are:
+ *
+ * - CC_HAS_IBT: the toolchain supports it
+ * - X86_KERNEL_IBT: it is selected in Kconfig
+ * - !__DISABLE_EXPORTS: this is regular kernel code
+ *
+ * Esp. that latter one is a bit non-obvious, but some code like compressed,
+ * purgatory, realmode etc.. is built with custom CFLAGS that do not include
+ * -fcf-protection=branch and things will go *bang*.
+ *
+ * When all the above are satisfied, HAS_KERNEL_IBT will be 1, otherwise 0.
+ */
+#if defined(CONFIG_X86_KERNEL_IBT) && !defined(__DISABLE_EXPORTS)
+
+#define HAS_KERNEL_IBT 1
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_X86_64
+#define ASM_ENDBR "endbr64\n\t"
+#else
+#define ASM_ENDBR "endbr32\n\t"
+#endif
+
+#define __noendbr __attribute__((nocf_check))
+
+/*
+ * Create a dummy function pointer reference to prevent objtool from marking
+ * the function as needing to be "sealed" (i.e. ENDBR converted to NOP by
+ * apply_ibt_endbr()).
+ */
+#define IBT_NOSEAL(fname) \
+ ".pushsection .discard.ibt_endbr_noseal\n\t" \
+ _ASM_PTR fname "\n\t" \
+ ".popsection\n\t"
+
+static inline __attribute_const__ u32 gen_endbr(void)
+{
+ u32 endbr;
+
+ /*
+ * Generate ENDBR64 in a way that is sure to not result in
+ * an ENDBR64 instruction as immediate.
+ */
+ asm ( "mov $~0xfa1e0ff3, %[endbr]\n\t"
+ "not %[endbr]\n\t"
+ : [endbr] "=&r" (endbr) );
+
+ return endbr;
+}
+
+static inline __attribute_const__ u32 gen_endbr_poison(void)
+{
+ /*
+ * 4 byte NOP that isn't NOP4 (in fact it is OSP NOP3), such that it
+ * will be unique to (former) ENDBR sites.
+ */
+ return 0x001f0f66; /* osp nopl (%rax) */
+}
+
+static inline bool is_endbr(u32 val)
+{
+ if (val == gen_endbr_poison())
+ return true;
+
+ val &= ~0x01000000U; /* ENDBR32 -> ENDBR64 */
+ return val == gen_endbr();
+}
+
+extern __noendbr u64 ibt_save(void);
+extern __noendbr void ibt_restore(u64 save);
+
+#else /* __ASSEMBLY__ */
+
+#ifdef CONFIG_X86_64
+#define ENDBR endbr64
+#else
+#define ENDBR endbr32
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#else /* !IBT */
+
+#define HAS_KERNEL_IBT 0
+
+#ifndef __ASSEMBLY__
+
+#define ASM_ENDBR
+#define IBT_NOSEAL(name)
+
+#define __noendbr
+
+static inline bool is_endbr(u32 val) { return false; }
+
+static inline u64 ibt_save(void) { return 0; }
+static inline void ibt_restore(u64 save) { }
+
+#else /* __ASSEMBLY__ */
+
+#define ENDBR
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* CONFIG_X86_KERNEL_IBT */
+
+#define ENDBR_INSN_SIZE (4*HAS_KERNEL_IBT)
+
+#endif /* _ASM_X86_IBT_H */
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 1345088e9902..72184b0b2219 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -5,6 +5,8 @@
/* Interrupts/Exceptions */
#include <asm/trapnr.h>
+#define IDT_ALIGN (8 * (1 + HAS_KERNEL_IBT))
+
#ifndef __ASSEMBLY__
#include <linux/entry-common.h>
#include <linux/hardirq.h>
@@ -480,7 +482,7 @@ __visible noinstr void func(struct pt_regs *regs, \
/*
* ASM code to emit the common vector entry stubs where each stub is
- * packed into 8 bytes.
+ * packed into IDT_ALIGN bytes.
*
* Note, that the 'pushq imm8' is emitted via '.byte 0x6a, vector' because
* GCC treats the local vector variable as unsigned int and would expand
@@ -492,33 +494,33 @@ __visible noinstr void func(struct pt_regs *regs, \
* point is to mask off the bits above bit 7 because the push is sign
* extending.
*/
- .align 8
+ .align IDT_ALIGN
SYM_CODE_START(irq_entries_start)
vector=FIRST_EXTERNAL_VECTOR
.rept NR_EXTERNAL_VECTORS
UNWIND_HINT_IRET_REGS
0 :
+ ENDBR
.byte 0x6a, vector
jmp asm_common_interrupt
- nop
- /* Ensure that the above is 8 bytes max */
- . = 0b + 8
+ /* Ensure that the above is IDT_ALIGN bytes max */
+ .fill 0b + IDT_ALIGN - ., 1, 0xcc
vector = vector+1
.endr
SYM_CODE_END(irq_entries_start)
#ifdef CONFIG_X86_LOCAL_APIC
- .align 8
+ .align IDT_ALIGN
SYM_CODE_START(spurious_entries_start)
vector=FIRST_SYSTEM_VECTOR
.rept NR_SYSTEM_VECTORS
UNWIND_HINT_IRET_REGS
0 :
+ ENDBR
.byte 0x6a, vector
jmp asm_spurious_interrupt
- nop
- /* Ensure that the above is 8 bytes max */
- . = 0b + 8
+ /* Ensure that the above is IDT_ALIGN bytes max */
+ .fill 0b + IDT_ALIGN - ., 1, 0xcc
vector = vector+1
.endr
SYM_CODE_END(spurious_entries_start)
@@ -615,6 +617,11 @@ DECLARE_IDTENTRY_DF(X86_TRAP_DF, exc_double_fault);
DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_DF, xenpv_exc_double_fault);
#endif
+/* #CP */
+#ifdef CONFIG_X86_KERNEL_IBT
+DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_CP, exc_control_protection);
+#endif
+
/* #VC */
#ifdef CONFIG_AMD_MEM_ENCRYPT
DECLARE_IDTENTRY_VC(X86_TRAP_VC, exc_vmm_communication);
@@ -625,6 +632,10 @@ DECLARE_IDTENTRY_XENCB(X86_TRAP_OTHER, exc_xen_hypervisor_callback);
DECLARE_IDTENTRY_RAW(X86_TRAP_OTHER, exc_xen_unknown_trap);
#endif
+#ifdef CONFIG_INTEL_TDX_GUEST
+DECLARE_IDTENTRY(X86_TRAP_VE, exc_virtualization_exception);
+#endif
+
/* Device interrupts common/spurious */
DECLARE_IDTENTRY_IRQ(X86_TRAP_OTHER, common_interrupt);
#ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h
index 4ec3613551e3..f07faa61c7f3 100644
--- a/arch/x86/include/asm/insn-eval.h
+++ b/arch/x86/include/asm/insn-eval.h
@@ -15,10 +15,13 @@
#define INSN_CODE_SEG_OPND_SZ(params) (params & 0xf)
#define INSN_CODE_SEG_PARAMS(oper_sz, addr_sz) (oper_sz | (addr_sz << 4))
+int pt_regs_offset(struct pt_regs *regs, int regno);
+
bool insn_has_rep_prefix(struct insn *insn);
void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs);
int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs);
int insn_get_modrm_reg_off(struct insn *insn, struct pt_regs *regs);
+unsigned long *insn_get_modrm_reg_ptr(struct insn *insn, struct pt_regs *regs);
unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx);
int insn_get_code_seg_params(struct pt_regs *regs);
int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip);
@@ -29,4 +32,16 @@ int insn_fetch_from_user_inatomic(struct pt_regs *regs,
bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs,
unsigned char buf[MAX_INSN_SIZE], int buf_size);
+enum mmio_type {
+ MMIO_DECODE_FAILED,
+ MMIO_WRITE,
+ MMIO_WRITE_IMM,
+ MMIO_READ,
+ MMIO_READ_ZERO_EXTEND,
+ MMIO_READ_SIGN_EXTEND,
+ MMIO_MOVS,
+};
+
+enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes);
+
#endif /* _ASM_X86_INSN_EVAL_H */
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 05a6ab940f45..1b29f58f730f 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -124,7 +124,7 @@ struct insn {
#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */
#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */
/* VEX bit fields */
-#define X86_EVEX_M(vex) ((vex) & 0x03) /* EVEX Byte1 */
+#define X86_EVEX_M(vex) ((vex) & 0x07) /* EVEX Byte1 */
#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */
#define X86_VEX2_M 1 /* VEX2.M always 1 */
#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 048b6d5aff50..347707d459c6 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -26,6 +26,8 @@
* _G - parts with extra graphics on
* _X - regular server parts
* _D - micro server parts
+ * _N,_P - other mobile parts
+ * _S - other client parts
*
* Historical OPTDIFFs:
*
@@ -105,12 +107,23 @@
#define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Golden Cove */
+#define INTEL_FAM6_EMERALDRAPIDS_X 0xCF
+
+#define INTEL_FAM6_GRANITERAPIDS_X 0xAD
+#define INTEL_FAM6_GRANITERAPIDS_D 0xAE
+
#define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */
#define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */
+#define INTEL_FAM6_ALDERLAKE_N 0xBE
#define INTEL_FAM6_RAPTORLAKE 0xB7
+#define INTEL_FAM6_RAPTORLAKE_P 0xBA
+#define INTEL_FAM6_RAPTORLAKE_S 0xBF
+
+#define INTEL_FAM6_METEORLAKE 0xAC
+#define INTEL_FAM6_METEORLAKE_L 0xAA
-/* "Small Core" Processors (Atom) */
+/* "Small Core" Processors (Atom/E-Core) */
#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
#define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */
@@ -137,6 +150,10 @@
#define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */
#define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */
+#define INTEL_FAM6_SIERRAFOREST_X 0xAF
+
+#define INTEL_FAM6_GRANDRIDGE 0xB6
+
/* Xeon Phi */
#define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */
diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h
index 8380c3ddd4b2..2f9eeb5c3069 100644
--- a/arch/x86/include/asm/intel_ds.h
+++ b/arch/x86/include/asm/intel_ds.h
@@ -7,8 +7,9 @@
#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
/* The maximal number of PEBS events: */
-#define MAX_PEBS_EVENTS 8
-#define MAX_FIXED_PEBS_EVENTS 4
+#define MAX_PEBS_EVENTS_FMT4 8
+#define MAX_PEBS_EVENTS 32
+#define MAX_FIXED_PEBS_EVENTS 16
/*
* A debug store configuration.
diff --git a/arch/x86/include/asm/intel_pt.h b/arch/x86/include/asm/intel_pt.h
index ebe8d2ea44fe..c796e9bc98b6 100644
--- a/arch/x86/include/asm/intel_pt.h
+++ b/arch/x86/include/asm/intel_pt.h
@@ -13,6 +13,8 @@ enum pt_capabilities {
PT_CAP_mtc,
PT_CAP_ptwrite,
PT_CAP_power_event_trace,
+ PT_CAP_event_trace,
+ PT_CAP_tnt_disable,
PT_CAP_topa_output,
PT_CAP_topa_multiple_entries,
PT_CAP_single_range_output,
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 5c6a4af0b911..e9025640f634 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -40,9 +40,11 @@
#include <linux/string.h>
#include <linux/compiler.h>
+#include <linux/cc_platform.h>
#include <asm/page.h>
#include <asm/early_ioremap.h>
#include <asm/pgtable_types.h>
+#include <asm/shared/io.h>
#define build_mmio_read(name, size, type, reg, barrier) \
static inline type name(const volatile void __iomem *addr) \
@@ -168,15 +170,6 @@ static inline unsigned int isa_virt_to_bus(volatile void *address)
#define isa_bus_to_virt phys_to_virt
/*
- * However PCI ones are not necessarily 1:1 and therefore these interfaces
- * are forbidden in portable PCI drivers.
- *
- * Allow them on x86 for legacy drivers, though.
- */
-#define virt_to_bus virt_to_phys
-#define bus_to_virt phys_to_virt
-
-/*
* The default ioremap() behavior is non-cached; if you need something
* else, you probably want one of the following.
*/
@@ -209,8 +202,6 @@ void __iomem *ioremap(resource_size_t offset, unsigned long size);
extern void iounmap(volatile void __iomem *addr);
#define iounmap iounmap
-extern void set_iounmap_nonlazy(void);
-
#ifdef __KERNEL__
void memcpy_fromio(void *, const volatile void __iomem *, size_t);
@@ -256,53 +247,24 @@ static inline void slow_down_io(void)
#endif
-#ifdef CONFIG_AMD_MEM_ENCRYPT
-#include <linux/jump_label.h>
-
-extern struct static_key_false sev_enable_key;
-static inline bool sev_key_active(void)
-{
- return static_branch_unlikely(&sev_enable_key);
-}
-
-#else /* !CONFIG_AMD_MEM_ENCRYPT */
-
-static inline bool sev_key_active(void) { return false; }
-
-#endif /* CONFIG_AMD_MEM_ENCRYPT */
-
#define BUILDIO(bwl, bw, type) \
-static inline void out##bwl(unsigned type value, int port) \
-{ \
- asm volatile("out" #bwl " %" #bw "0, %w1" \
- : : "a"(value), "Nd"(port)); \
-} \
- \
-static inline unsigned type in##bwl(int port) \
-{ \
- unsigned type value; \
- asm volatile("in" #bwl " %w1, %" #bw "0" \
- : "=a"(value) : "Nd"(port)); \
- return value; \
-} \
- \
-static inline void out##bwl##_p(unsigned type value, int port) \
+static inline void out##bwl##_p(type value, u16 port) \
{ \
out##bwl(value, port); \
slow_down_io(); \
} \
\
-static inline unsigned type in##bwl##_p(int port) \
+static inline type in##bwl##_p(u16 port) \
{ \
- unsigned type value = in##bwl(port); \
+ type value = in##bwl(port); \
slow_down_io(); \
return value; \
} \
\
-static inline void outs##bwl(int port, const void *addr, unsigned long count) \
+static inline void outs##bwl(u16 port, const void *addr, unsigned long count) \
{ \
- if (sev_key_active()) { \
- unsigned type *value = (unsigned type *)addr; \
+ if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) { \
+ type *value = (type *)addr; \
while (count) { \
out##bwl(*value, port); \
value++; \
@@ -315,10 +277,10 @@ static inline void outs##bwl(int port, const void *addr, unsigned long count) \
} \
} \
\
-static inline void ins##bwl(int port, void *addr, unsigned long count) \
+static inline void ins##bwl(u16 port, void *addr, unsigned long count) \
{ \
- if (sev_key_active()) { \
- unsigned type *value = (unsigned type *)addr; \
+ if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) { \
+ type *value = (type *)addr; \
while (count) { \
*value = in##bwl(port); \
value++; \
@@ -331,13 +293,11 @@ static inline void ins##bwl(int port, void *addr, unsigned long count) \
} \
}
-BUILDIO(b, b, char)
-BUILDIO(w, w, short)
-BUILDIO(l, , int)
+BUILDIO(b, b, u8)
+BUILDIO(w, w, u16)
+BUILDIO(l, , u32)
+#undef BUILDIO
-#define inb inb
-#define inw inw
-#define inl inl
#define inb_p inb_p
#define inw_p inw_p
#define inl_p inl_p
@@ -345,9 +305,6 @@ BUILDIO(l, , int)
#define insw insw
#define insl insl
-#define outb outb
-#define outw outw
-#define outl outl
#define outb_p outb_p
#define outw_p outw_p
#define outl_p outl_p
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index bf1ed2ddc74b..2fd52b65deac 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -8,6 +8,14 @@
extern int force_iommu, no_iommu;
extern int iommu_detected;
+extern int iommu_merge;
+extern int panic_on_overflow;
+
+#ifdef CONFIG_SWIOTLB
+extern bool x86_swiotlb_enable;
+#else
+#define x86_swiotlb_enable false
+#endif
/* 10 seconds */
#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
@@ -17,8 +25,10 @@ arch_rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
{
u64 start = rmrr->base_address;
u64 end = rmrr->end_address + 1;
+ int entry_type;
- if (e820__mapped_all(start, end, E820_TYPE_RESERVED))
+ entry_type = e820__get_entry_type(start, end);
+ if (entry_type == E820_TYPE_RESERVED || entry_type == E820_TYPE_NVS)
return 0;
pr_err(FW_BUG "No firmware reserved region can cover this RMRR [%#018Lx-%#018Lx], contact BIOS vendor for fixes\n",
diff --git a/arch/x86/include/asm/iommu_table.h b/arch/x86/include/asm/iommu_table.h
deleted file mode 100644
index 1fb3fd1a83c2..000000000000
--- a/arch/x86/include/asm/iommu_table.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_IOMMU_TABLE_H
-#define _ASM_X86_IOMMU_TABLE_H
-
-#include <asm/swiotlb.h>
-
-/*
- * History lesson:
- * The execution chain of IOMMUs in 2.6.36 looks as so:
- *
- * [xen-swiotlb]
- * |
- * +----[swiotlb *]--+
- * / | \
- * / | \
- * [GART] [Calgary] [Intel VT-d]
- * /
- * /
- * [AMD-Vi]
- *
- * *: if SWIOTLB detected 'iommu=soft'/'swiotlb=force' it would skip
- * over the rest of IOMMUs and unconditionally initialize the SWIOTLB.
- * Also it would surreptitiously initialize set the swiotlb=1 if there were
- * more than 4GB and if the user did not pass in 'iommu=off'. The swiotlb
- * flag would be turned off by all IOMMUs except the Calgary one.
- *
- * The IOMMU_INIT* macros allow a similar tree (or more complex if desired)
- * to be built by defining who we depend on.
- *
- * And all that needs to be done is to use one of the macros in the IOMMU
- * and the pci-dma.c will take care of the rest.
- */
-
-struct iommu_table_entry {
- initcall_t detect;
- initcall_t depend;
- void (*early_init)(void); /* No memory allocate available. */
- void (*late_init)(void); /* Yes, can allocate memory. */
-#define IOMMU_FINISH_IF_DETECTED (1<<0)
-#define IOMMU_DETECTED (1<<1)
- int flags;
-};
-/*
- * Macro fills out an entry in the .iommu_table that is equivalent
- * to the fields that 'struct iommu_table_entry' has. The entries
- * that are put in the .iommu_table section are not put in any order
- * hence during boot-time we will have to resort them based on
- * dependency. */
-
-
-#define __IOMMU_INIT(_detect, _depend, _early_init, _late_init, _finish)\
- static const struct iommu_table_entry \
- __iommu_entry_##_detect __used \
- __attribute__ ((unused, __section__(".iommu_table"), \
- aligned((sizeof(void *))))) \
- = {_detect, _depend, _early_init, _late_init, \
- _finish ? IOMMU_FINISH_IF_DETECTED : 0}
-/*
- * The simplest IOMMU definition. Provide the detection routine
- * and it will be run after the SWIOTLB and the other IOMMUs
- * that utilize this macro. If the IOMMU is detected (ie, the
- * detect routine returns a positive value), the other IOMMUs
- * are also checked. You can use IOMMU_INIT_POST_FINISH if you prefer
- * to stop detecting the other IOMMUs after yours has been detected.
- */
-#define IOMMU_INIT_POST(_detect) \
- __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, NULL, NULL, 0)
-
-#define IOMMU_INIT_POST_FINISH(detect) \
- __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, NULL, NULL, 1)
-
-/*
- * A more sophisticated version of IOMMU_INIT. This variant requires:
- * a). A detection routine function.
- * b). The name of the detection routine we depend on to get called
- * before us.
- * c). The init routine which gets called if the detection routine
- * returns a positive value from the pci_iommu_alloc. This means
- * no presence of a memory allocator.
- * d). Similar to the 'init', except that this gets called from pci_iommu_init
- * where we do have a memory allocator.
- *
- * The standard IOMMU_INIT differs from the IOMMU_INIT_FINISH variant
- * in that the former will continue detecting other IOMMUs in the call
- * list after the detection routine returns a positive number, while the
- * latter will stop the execution chain upon first successful detection.
- * Both variants will still call the 'init' and 'late_init' functions if
- * they are set.
- */
-#define IOMMU_INIT_FINISH(_detect, _depend, _init, _late_init) \
- __IOMMU_INIT(_detect, _depend, _init, _late_init, 1)
-
-#define IOMMU_INIT(_detect, _depend, _init, _late_init) \
- __IOMMU_INIT(_detect, _depend, _init, _late_init, 0)
-
-void sort_iommu_table(struct iommu_table_entry *start,
- struct iommu_table_entry *finish);
-
-void check_iommu_entries(struct iommu_table_entry *start,
- struct iommu_table_entry *finish);
-
-#endif /* _ASM_X86_IOMMU_TABLE_H */
diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h
index ae9d40f6c706..147cb8fdda92 100644
--- a/arch/x86/include/asm/irq_stack.h
+++ b/arch/x86/include/asm/irq_stack.h
@@ -3,6 +3,7 @@
#define _ASM_X86_IRQ_STACK_H
#include <linux/ptrace.h>
+#include <linux/objtool.h>
#include <asm/processor.h>
@@ -99,7 +100,8 @@
}
#define ASM_CALL_ARG0 \
- "call %P[__func] \n"
+ "call %P[__func] \n" \
+ ASM_REACHABLE
#define ASM_CALL_ARG1 \
"movq %[arg1], %%rdi \n" \
@@ -201,7 +203,7 @@
IRQ_CONSTRAINTS, regs, vector); \
}
-#ifndef CONFIG_PREEMPT_RT
+#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK
/*
* Macro to invoke __do_softirq on the irq stack. This is only called from
* task context when bottom halves are about to be reenabled and soft
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index c5ce9845c999..7793e52d6237 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -114,8 +114,6 @@ static __always_inline unsigned long arch_local_irq_save(void)
#define SAVE_FLAGS pushfq; popq %rax
#endif
-#define INTERRUPT_RETURN jmp native_iret
-
#endif
#endif /* __ASSEMBLY__ */
@@ -139,14 +137,6 @@ static __always_inline void arch_local_irq_restore(unsigned long flags)
if (!arch_irqs_disabled_flags(flags))
arch_local_irq_enable();
}
-#else
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_XEN_PV
-#define SWAPGS ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV
-#else
-#define SWAPGS swapgs
-#endif
-#endif
#endif /* !__ASSEMBLY__ */
#endif
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 0449b125d27f..071572e23d3a 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -20,7 +20,7 @@
_ASM_PTR "%c0 + %c1 - .\n\t" \
".popsection \n\t"
-#ifdef CONFIG_STACK_VALIDATION
+#ifdef CONFIG_HAVE_JUMP_LABEL_HACK
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
@@ -34,7 +34,7 @@ l_yes:
return true;
}
-#else
+#else /* !CONFIG_HAVE_JUMP_LABEL_HACK */
static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch)
{
@@ -48,7 +48,7 @@ l_yes:
return true;
}
-#endif /* STACK_VALIDATION */
+#endif /* CONFIG_HAVE_JUMP_LABEL_HACK */
static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
{
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 11b7c06e2828..a3760ca796aa 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -186,6 +186,26 @@ extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages,
extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages);
#define arch_kexec_pre_free_pages arch_kexec_pre_free_pages
+void arch_kexec_protect_crashkres(void);
+#define arch_kexec_protect_crashkres arch_kexec_protect_crashkres
+
+void arch_kexec_unprotect_crashkres(void);
+#define arch_kexec_unprotect_crashkres arch_kexec_unprotect_crashkres
+
+#ifdef CONFIG_KEXEC_FILE
+struct purgatory_info;
+int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+ Elf_Shdr *section,
+ const Elf_Shdr *relsec,
+ const Elf_Shdr *symtab);
+#define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add
+
+void *arch_kexec_kernel_image_load(struct kimage *image);
+#define arch_kexec_kernel_image_load arch_kexec_kernel_image_load
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image);
+#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup
+#endif
#endif
typedef void crash_vmclear_fn(void);
diff --git a/arch/x86/include/asm/kmsan.h b/arch/x86/include/asm/kmsan.h
new file mode 100644
index 000000000000..8fa6ac0e2d76
--- /dev/null
+++ b/arch/x86/include/asm/kmsan.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * x86 KMSAN support.
+ *
+ * Copyright (C) 2022, Google LLC
+ * Author: Alexander Potapenko <glider@google.com>
+ */
+
+#ifndef _ASM_X86_KMSAN_H
+#define _ASM_X86_KMSAN_H
+
+#ifndef MODULE
+
+#include <asm/cpu_entry_area.h>
+#include <asm/processor.h>
+#include <linux/mmzone.h>
+
+DECLARE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_shadow);
+DECLARE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_origin);
+
+/*
+ * Functions below are declared in the header to make sure they are inlined.
+ * They all are called from kmsan_get_metadata() for every memory access in
+ * the kernel, so speed is important here.
+ */
+
+/*
+ * Compute metadata addresses for the CPU entry area on x86.
+ */
+static inline void *arch_kmsan_get_meta_or_null(void *addr, bool is_origin)
+{
+ unsigned long addr64 = (unsigned long)addr;
+ char *metadata_array;
+ unsigned long off;
+ int cpu;
+
+ if ((addr64 < CPU_ENTRY_AREA_BASE) ||
+ (addr64 >= (CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE)))
+ return NULL;
+ cpu = (addr64 - CPU_ENTRY_AREA_BASE) / CPU_ENTRY_AREA_SIZE;
+ off = addr64 - (unsigned long)get_cpu_entry_area(cpu);
+ if ((off < 0) || (off >= CPU_ENTRY_AREA_SIZE))
+ return NULL;
+ metadata_array = is_origin ? cpu_entry_area_origin :
+ cpu_entry_area_shadow;
+ return &per_cpu(metadata_array[off], cpu);
+}
+
+/*
+ * Taken from arch/x86/mm/physaddr.h to avoid using an instrumented version.
+ */
+static inline bool kmsan_phys_addr_valid(unsigned long addr)
+{
+ if (IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
+ return !(addr >> boot_cpu_data.x86_phys_bits);
+ else
+ return true;
+}
+
+/*
+ * Taken from arch/x86/mm/physaddr.c to avoid using an instrumented version.
+ */
+static inline bool kmsan_virt_addr_valid(void *addr)
+{
+ unsigned long x = (unsigned long)addr;
+ unsigned long y = x - __START_KERNEL_map;
+
+ /* use the carry flag to determine if x was < __START_KERNEL_map */
+ if (unlikely(x > y)) {
+ x = y + phys_base;
+
+ if (y >= KERNEL_IMAGE_SIZE)
+ return false;
+ } else {
+ x = y + (__START_KERNEL_map - PAGE_OFFSET);
+
+ /* carry flag will be set if starting x was >= PAGE_OFFSET */
+ if ((x > y) || !kmsan_phys_addr_valid(x))
+ return false;
+ }
+
+ return pfn_valid(x >> PAGE_SHIFT);
+}
+
+#endif /* !MODULE */
+
+#endif /* _ASM_X86_KMSAN_H */
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 71ea2eab43d5..a2e9317aad49 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -50,8 +50,6 @@ extern const int kretprobe_blacklist_size;
void arch_remove_kprobe(struct kprobe *p);
-extern void arch_kprobe_override_function(struct pt_regs *regs);
-
/* Architecture specific copy of original instruction*/
struct arch_specific_insn {
/* copy of the original instruction */
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index cefe1d81e2e8..82ba4a564e58 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -1,29 +1,31 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#if !defined(KVM_X86_OP) || !defined(KVM_X86_OP_NULL)
+#if !defined(KVM_X86_OP) || !defined(KVM_X86_OP_OPTIONAL)
BUILD_BUG_ON(1)
#endif
/*
- * KVM_X86_OP() and KVM_X86_OP_NULL() are used to help generate
- * "static_call()"s. They are also intended for use when defining
- * the vmx/svm kvm_x86_ops. KVM_X86_OP() can be used for those
- * functions that follow the [svm|vmx]_func_name convention.
- * KVM_X86_OP_NULL() can leave a NULL definition for the
- * case where there is no definition or a function name that
- * doesn't match the typical naming convention is supplied.
+ * KVM_X86_OP() and KVM_X86_OP_OPTIONAL() are used to help generate
+ * both DECLARE/DEFINE_STATIC_CALL() invocations and
+ * "static_call_update()" calls.
+ *
+ * KVM_X86_OP_OPTIONAL() can be used for those functions that can have
+ * a NULL definition, for example if "static_call_cond()" will be used
+ * at the call sites. KVM_X86_OP_OPTIONAL_RET0() can be used likewise
+ * to make a definition optional, but in this case the default will
+ * be __static_call_return0.
*/
-KVM_X86_OP_NULL(hardware_enable)
-KVM_X86_OP_NULL(hardware_disable)
-KVM_X86_OP_NULL(hardware_unsetup)
-KVM_X86_OP_NULL(cpu_has_accelerated_tpr)
+KVM_X86_OP(hardware_enable)
+KVM_X86_OP(hardware_disable)
+KVM_X86_OP(hardware_unsetup)
KVM_X86_OP(has_emulated_msr)
KVM_X86_OP(vcpu_after_set_cpuid)
KVM_X86_OP(vm_init)
-KVM_X86_OP_NULL(vm_destroy)
+KVM_X86_OP_OPTIONAL(vm_destroy)
+KVM_X86_OP_OPTIONAL_RET0(vcpu_precreate)
KVM_X86_OP(vcpu_create)
KVM_X86_OP(vcpu_free)
KVM_X86_OP(vcpu_reset)
-KVM_X86_OP(prepare_guest_switch)
+KVM_X86_OP(prepare_switch_to_guest)
KVM_X86_OP(vcpu_load)
KVM_X86_OP(vcpu_put)
KVM_X86_OP(update_exception_bitmap)
@@ -33,8 +35,9 @@ KVM_X86_OP(get_segment_base)
KVM_X86_OP(get_segment)
KVM_X86_OP(get_cpl)
KVM_X86_OP(set_segment)
-KVM_X86_OP_NULL(get_cs_db_l_bits)
+KVM_X86_OP(get_cs_db_l_bits)
KVM_X86_OP(set_cr0)
+KVM_X86_OP_OPTIONAL(post_set_cr3)
KVM_X86_OP(is_valid_cr4)
KVM_X86_OP(set_cr4)
KVM_X86_OP(set_efer)
@@ -47,22 +50,24 @@ KVM_X86_OP(set_dr7)
KVM_X86_OP(cache_reg)
KVM_X86_OP(get_rflags)
KVM_X86_OP(set_rflags)
-KVM_X86_OP(tlb_flush_all)
-KVM_X86_OP(tlb_flush_current)
-KVM_X86_OP_NULL(tlb_remote_flush)
-KVM_X86_OP_NULL(tlb_remote_flush_with_range)
-KVM_X86_OP(tlb_flush_gva)
-KVM_X86_OP(tlb_flush_guest)
-KVM_X86_OP(run)
-KVM_X86_OP_NULL(handle_exit)
-KVM_X86_OP_NULL(skip_emulated_instruction)
-KVM_X86_OP_NULL(update_emulated_instruction)
+KVM_X86_OP(get_if_flag)
+KVM_X86_OP(flush_tlb_all)
+KVM_X86_OP(flush_tlb_current)
+KVM_X86_OP_OPTIONAL(tlb_remote_flush)
+KVM_X86_OP_OPTIONAL(tlb_remote_flush_with_range)
+KVM_X86_OP(flush_tlb_gva)
+KVM_X86_OP(flush_tlb_guest)
+KVM_X86_OP(vcpu_pre_run)
+KVM_X86_OP(vcpu_run)
+KVM_X86_OP(handle_exit)
+KVM_X86_OP(skip_emulated_instruction)
+KVM_X86_OP_OPTIONAL(update_emulated_instruction)
KVM_X86_OP(set_interrupt_shadow)
KVM_X86_OP(get_interrupt_shadow)
KVM_X86_OP(patch_hypercall)
-KVM_X86_OP(set_irq)
-KVM_X86_OP(set_nmi)
-KVM_X86_OP(queue_exception)
+KVM_X86_OP(inject_irq)
+KVM_X86_OP(inject_nmi)
+KVM_X86_OP(inject_exception)
KVM_X86_OP(cancel_injection)
KVM_X86_OP(interrupt_allowed)
KVM_X86_OP(nmi_allowed)
@@ -70,22 +75,22 @@ KVM_X86_OP(get_nmi_mask)
KVM_X86_OP(set_nmi_mask)
KVM_X86_OP(enable_nmi_window)
KVM_X86_OP(enable_irq_window)
-KVM_X86_OP(update_cr8_intercept)
+KVM_X86_OP_OPTIONAL(update_cr8_intercept)
KVM_X86_OP(check_apicv_inhibit_reasons)
KVM_X86_OP(refresh_apicv_exec_ctrl)
-KVM_X86_OP(hwapic_irr_update)
-KVM_X86_OP(hwapic_isr_update)
-KVM_X86_OP_NULL(guest_apic_has_interrupt)
-KVM_X86_OP(load_eoi_exitmap)
-KVM_X86_OP(set_virtual_apic_mode)
-KVM_X86_OP_NULL(set_apic_access_page_addr)
-KVM_X86_OP(deliver_posted_interrupt)
-KVM_X86_OP_NULL(sync_pir_to_irr)
-KVM_X86_OP(set_tss_addr)
-KVM_X86_OP(set_identity_map_addr)
-KVM_X86_OP(get_mt_mask)
+KVM_X86_OP_OPTIONAL(hwapic_irr_update)
+KVM_X86_OP_OPTIONAL(hwapic_isr_update)
+KVM_X86_OP_OPTIONAL_RET0(guest_apic_has_interrupt)
+KVM_X86_OP_OPTIONAL(load_eoi_exitmap)
+KVM_X86_OP_OPTIONAL(set_virtual_apic_mode)
+KVM_X86_OP_OPTIONAL(set_apic_access_page_addr)
+KVM_X86_OP(deliver_interrupt)
+KVM_X86_OP_OPTIONAL(sync_pir_to_irr)
+KVM_X86_OP_OPTIONAL_RET0(set_tss_addr)
+KVM_X86_OP_OPTIONAL_RET0(set_identity_map_addr)
+KVM_X86_OP_OPTIONAL_RET0(get_mt_mask)
KVM_X86_OP(load_mmu_pgd)
-KVM_X86_OP_NULL(has_wbinvd_exit)
+KVM_X86_OP(has_wbinvd_exit)
KVM_X86_OP(get_l2_tsc_offset)
KVM_X86_OP(get_l2_tsc_multiplier)
KVM_X86_OP(write_tsc_offset)
@@ -93,34 +98,38 @@ KVM_X86_OP(write_tsc_multiplier)
KVM_X86_OP(get_exit_info)
KVM_X86_OP(check_intercept)
KVM_X86_OP(handle_exit_irqoff)
-KVM_X86_OP_NULL(request_immediate_exit)
+KVM_X86_OP(request_immediate_exit)
KVM_X86_OP(sched_in)
-KVM_X86_OP_NULL(update_cpu_dirty_logging)
-KVM_X86_OP_NULL(pre_block)
-KVM_X86_OP_NULL(post_block)
-KVM_X86_OP_NULL(vcpu_blocking)
-KVM_X86_OP_NULL(vcpu_unblocking)
-KVM_X86_OP_NULL(update_pi_irte)
-KVM_X86_OP_NULL(start_assignment)
-KVM_X86_OP_NULL(apicv_post_state_restore)
-KVM_X86_OP_NULL(dy_apicv_has_pending_interrupt)
-KVM_X86_OP_NULL(set_hv_timer)
-KVM_X86_OP_NULL(cancel_hv_timer)
+KVM_X86_OP_OPTIONAL(update_cpu_dirty_logging)
+KVM_X86_OP_OPTIONAL(vcpu_blocking)
+KVM_X86_OP_OPTIONAL(vcpu_unblocking)
+KVM_X86_OP_OPTIONAL(pi_update_irte)
+KVM_X86_OP_OPTIONAL(pi_start_assignment)
+KVM_X86_OP_OPTIONAL(apicv_post_state_restore)
+KVM_X86_OP_OPTIONAL_RET0(dy_apicv_has_pending_interrupt)
+KVM_X86_OP_OPTIONAL(set_hv_timer)
+KVM_X86_OP_OPTIONAL(cancel_hv_timer)
KVM_X86_OP(setup_mce)
KVM_X86_OP(smi_allowed)
KVM_X86_OP(enter_smm)
KVM_X86_OP(leave_smm)
KVM_X86_OP(enable_smi_window)
-KVM_X86_OP_NULL(mem_enc_op)
-KVM_X86_OP_NULL(mem_enc_reg_region)
-KVM_X86_OP_NULL(mem_enc_unreg_region)
+KVM_X86_OP_OPTIONAL(mem_enc_ioctl)
+KVM_X86_OP_OPTIONAL(mem_enc_register_region)
+KVM_X86_OP_OPTIONAL(mem_enc_unregister_region)
+KVM_X86_OP_OPTIONAL(vm_copy_enc_context_from)
+KVM_X86_OP_OPTIONAL(vm_move_enc_context_from)
+KVM_X86_OP_OPTIONAL(guest_memory_reclaimed)
KVM_X86_OP(get_msr_feature)
KVM_X86_OP(can_emulate_instruction)
KVM_X86_OP(apic_init_signal_blocked)
-KVM_X86_OP_NULL(enable_direct_tlbflush)
-KVM_X86_OP_NULL(migrate_timers)
+KVM_X86_OP_OPTIONAL(enable_direct_tlbflush)
+KVM_X86_OP_OPTIONAL(migrate_timers)
KVM_X86_OP(msr_filter_changed)
-KVM_X86_OP_NULL(complete_emulated_msr)
+KVM_X86_OP(complete_emulated_msr)
+KVM_X86_OP(vcpu_deliver_sipi_vector)
+KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
#undef KVM_X86_OP
-#undef KVM_X86_OP_NULL
+#undef KVM_X86_OP_OPTIONAL
+#undef KVM_X86_OP_OPTIONAL_RET0
diff --git a/arch/x86/include/asm/kvm-x86-pmu-ops.h b/arch/x86/include/asm/kvm-x86-pmu-ops.h
new file mode 100644
index 000000000000..c17e3e96fc1d
--- /dev/null
+++ b/arch/x86/include/asm/kvm-x86-pmu-ops.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(KVM_X86_PMU_OP) || !defined(KVM_X86_PMU_OP_OPTIONAL)
+BUILD_BUG_ON(1)
+#endif
+
+/*
+ * KVM_X86_PMU_OP() and KVM_X86_PMU_OP_OPTIONAL() are used to help generate
+ * both DECLARE/DEFINE_STATIC_CALL() invocations and
+ * "static_call_update()" calls.
+ *
+ * KVM_X86_PMU_OP_OPTIONAL() can be used for those functions that can have
+ * a NULL definition, for example if "static_call_cond()" will be used
+ * at the call sites.
+ */
+KVM_X86_PMU_OP(hw_event_available)
+KVM_X86_PMU_OP(pmc_is_enabled)
+KVM_X86_PMU_OP(pmc_idx_to_pmc)
+KVM_X86_PMU_OP(rdpmc_ecx_to_pmc)
+KVM_X86_PMU_OP(msr_idx_to_pmc)
+KVM_X86_PMU_OP(is_valid_rdpmc_ecx)
+KVM_X86_PMU_OP(is_valid_msr)
+KVM_X86_PMU_OP(get_msr)
+KVM_X86_PMU_OP(set_msr)
+KVM_X86_PMU_OP(refresh)
+KVM_X86_PMU_OP(init)
+KVM_X86_PMU_OP(reset)
+KVM_X86_PMU_OP_OPTIONAL(deliver_pmi)
+KVM_X86_PMU_OP_OPTIONAL(cleanup)
+
+#undef KVM_X86_PMU_OP
+#undef KVM_X86_PMU_OP_OPTIONAL
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2164b9f4c7b0..f05ebaa26f0f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -15,6 +15,7 @@
#include <linux/cpumask.h>
#include <linux/irq_work.h>
#include <linux/irq.h>
+#include <linux/workqueue.h>
#include <linux/kvm.h>
#include <linux/kvm_para.h>
@@ -52,7 +53,7 @@
#define KVM_MAX_VCPU_IDS (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO)
/* memory slots that are not exposed to userspace */
-#define KVM_PRIVATE_MEM_SLOTS 3
+#define KVM_INTERNAL_MEM_SLOTS 3
#define KVM_HALT_POLL_NS_DEFAULT 200000
@@ -64,6 +65,9 @@
#define KVM_BUS_LOCK_DETECTION_VALID_MODE (KVM_BUS_LOCK_DETECTION_OFF | \
KVM_BUS_LOCK_DETECTION_EXIT)
+#define KVM_X86_NOTIFY_VMEXIT_VALID_BITS (KVM_X86_NOTIFY_VMEXIT_ENABLED | \
+ KVM_X86_NOTIFY_VMEXIT_USER)
+
/* x86-specific vcpu->requests bit members */
#define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0)
#define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1)
@@ -102,6 +106,8 @@
#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29)
#define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \
KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \
+ KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -123,7 +129,6 @@
#define INVALID_PAGE (~(hpa_t)0)
#define VALID_PAGE(x) ((x) != INVALID_PAGE)
-#define UNMAPPED_GVA (~(gpa_t)0)
#define INVALID_GPA (~(gpa_t)0)
/* KVM Hugepage definitions for x86 */
@@ -135,7 +140,7 @@
#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1))
#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
-#define KVM_PERMILLE_MMU_PAGES 20
+#define KVM_MEMSLOT_PAGES_TO_MMU_PAGES_RATIO 50
#define KVM_MIN_ALLOC_MMU_PAGES 64UL
#define KVM_MMU_HASH_SHIFT 12
#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
@@ -246,6 +251,7 @@ enum x86_intercept_stage;
#define PFERR_SGX_BIT 15
#define PFERR_GUEST_FINAL_BIT 32
#define PFERR_GUEST_PAGE_BIT 33
+#define PFERR_IMPLICIT_ACCESS_BIT 48
#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT)
#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT)
@@ -256,6 +262,7 @@ enum x86_intercept_stage;
#define PFERR_SGX_MASK (1U << PFERR_SGX_BIT)
#define PFERR_GUEST_FINAL_MASK (1ULL << PFERR_GUEST_FINAL_BIT)
#define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT)
+#define PFERR_IMPLICIT_ACCESS (1ULL << PFERR_IMPLICIT_ACCESS_BIT)
#define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \
PFERR_WRITE_MASK | \
@@ -276,11 +283,11 @@ struct kvm_kernel_irq_routing_entry;
/*
* kvm_mmu_page_role tracks the properties of a shadow page (where shadow page
* also includes TDP pages) to determine whether or not a page can be used in
- * the given MMU context. This is a subset of the overall kvm_mmu_role to
+ * the given MMU context. This is a subset of the overall kvm_cpu_role to
* minimize the size of kvm_memory_slot.arch.gfn_track, i.e. allows allocating
* 2 bytes per gfn instead of 4 bytes per gfn.
*
- * Indirect upper-level shadow pages are tracked for write-protection via
+ * Upper-level shadow pages having gptes are tracked for write-protection via
* gfn_track. As above, gfn_track is a 16 bit counter, so KVM must not create
* more than 2^16-1 upper-level shadow pages at a single gfn, otherwise
* gfn_track will overflow and explosions will ensure.
@@ -291,25 +298,31 @@ struct kvm_kernel_irq_routing_entry;
* the number of unique SPs that can theoretically be created is 2^n, where n
* is the number of bits that are used to compute the role.
*
- * But, even though there are 18 bits in the mask below, not all combinations
- * of modes and flags are possible. The maximum number of possible upper-level
- * shadow pages for a single gfn is in the neighborhood of 2^13.
+ * But, even though there are 19 bits in the mask below, not all combinations
+ * of modes and flags are possible:
+ *
+ * - invalid shadow pages are not accounted, so the bits are effectively 18
+ *
+ * - quadrant will only be used if has_4_byte_gpte=1 (non-PAE paging);
+ * execonly and ad_disabled are only used for nested EPT which has
+ * has_4_byte_gpte=0. Therefore, 2 bits are always unused.
+ *
+ * - the 4 bits of level are effectively limited to the values 2/3/4/5,
+ * as 4k SPs are not tracked (allowed to go unsync). In addition non-PAE
+ * paging has exactly one upper level, making level completely redundant
+ * when has_4_byte_gpte=1.
*
- * - invalid shadow pages are not accounted.
- * - level is effectively limited to four combinations, not 16 as the number
- * bits would imply, as 4k SPs are not tracked (allowed to go unsync).
- * - level is effectively unused for non-PAE paging because there is exactly
- * one upper level (see 4k SP exception above).
- * - quadrant is used only for non-PAE paging and is exclusive with
- * gpte_is_8_bytes.
- * - execonly and ad_disabled are used only for nested EPT, which makes it
- * exclusive with quadrant.
+ * - on top of this, smep_andnot_wp and smap_andnot_wp are only set if
+ * cr0_wp=0, therefore these three bits only give rise to 5 possibilities.
+ *
+ * Therefore, the maximum number of possible upper-level shadow pages for a
+ * single gfn is a bit less than 2^13.
*/
union kvm_mmu_page_role {
u32 word;
struct {
unsigned level:4;
- unsigned gpte_is_8_bytes:1;
+ unsigned has_4_byte_gpte:1;
unsigned quadrant:2;
unsigned direct:1;
unsigned access:3;
@@ -320,7 +333,8 @@ union kvm_mmu_page_role {
unsigned smap_andnot_wp:1;
unsigned ad_disabled:1;
unsigned guest_mode:1;
- unsigned :6;
+ unsigned passthrough:1;
+ unsigned :5;
/*
* This is left at the top of the word so that
@@ -356,8 +370,6 @@ union kvm_mmu_extended_role {
struct {
unsigned int valid:1;
unsigned int execonly:1;
- unsigned int cr0_pg:1;
- unsigned int cr4_pae:1;
unsigned int cr4_pse:1;
unsigned int cr4_pke:1;
unsigned int cr4_smap:1;
@@ -367,7 +379,7 @@ union kvm_mmu_extended_role {
};
};
-union kvm_mmu_role {
+union kvm_cpu_role {
u64 as_u64;
struct {
union kvm_mmu_page_role base;
@@ -420,28 +432,15 @@ struct kvm_mmu {
int (*page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
void (*inject_page_fault)(struct kvm_vcpu *vcpu,
struct x86_exception *fault);
- gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t gva_or_gpa,
- u32 access, struct x86_exception *exception);
- gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
- struct x86_exception *exception);
+ gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ gpa_t gva_or_gpa, u64 access,
+ struct x86_exception *exception);
int (*sync_page)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp);
void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa);
- hpa_t root_hpa;
- gpa_t root_pgd;
- union kvm_mmu_role mmu_role;
- u8 root_level;
- u8 shadow_root_level;
- u8 ept_ad;
- bool direct_map;
- struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS];
-
- /*
- * Bitmap; bit set = permission fault
- * Byte index: page fault error code [4:1]
- * Bit index: pte permissions in ACC_* format
- */
- u8 permissions[16];
+ struct kvm_mmu_root_info root;
+ union kvm_cpu_role cpu_role;
+ union kvm_mmu_page_role root_role;
/*
* The pkru_mask indicates if protection key checks are needed. It
@@ -451,6 +450,15 @@ struct kvm_mmu {
*/
u32 pkru_mask;
+ struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS];
+
+ /*
+ * Bitmap; bit set = permission fault
+ * Byte index: page fault error code [4:1]
+ * Bit index: pte permissions in ACC_* format
+ */
+ u8 permissions[16];
+
u64 *pae_root;
u64 *pml4_root;
u64 *pml5_root;
@@ -490,27 +498,51 @@ struct kvm_pmc {
*/
u64 current_config;
bool is_paused;
+ bool intr;
};
+/* More counters may conflict with other existing Architectural MSRs */
+#define KVM_INTEL_PMC_MAX_GENERIC 8
+#define MSR_ARCH_PERFMON_PERFCTR_MAX (MSR_ARCH_PERFMON_PERFCTR0 + KVM_INTEL_PMC_MAX_GENERIC - 1)
+#define MSR_ARCH_PERFMON_EVENTSEL_MAX (MSR_ARCH_PERFMON_EVENTSEL0 + KVM_INTEL_PMC_MAX_GENERIC - 1)
+#define KVM_PMC_MAX_FIXED 3
+#define KVM_AMD_PMC_MAX_GENERIC 6
struct kvm_pmu {
unsigned nr_arch_gp_counters;
unsigned nr_arch_fixed_counters;
unsigned available_event_types;
u64 fixed_ctr_ctrl;
+ u64 fixed_ctr_ctrl_mask;
u64 global_ctrl;
u64 global_status;
u64 counter_bitmask[2];
u64 global_ctrl_mask;
u64 global_ovf_ctrl_mask;
u64 reserved_bits;
+ u64 raw_event_mask;
u8 version;
- struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
- struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];
+ struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC];
+ struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED];
struct irq_work irq_work;
DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
+ u64 ds_area;
+ u64 pebs_enable;
+ u64 pebs_enable_mask;
+ u64 pebs_data_cfg;
+ u64 pebs_data_cfg_mask;
+
+ /*
+ * If a guest counter is cross-mapped to host counter with different
+ * index, its PEBS capability will be temporarily disabled.
+ *
+ * The user should make sure that this mask is updated
+ * after disabling interrupts and before perf_guest_get_msrs();
+ */
+ u64 host_cross_mapped_mask;
+
/*
* The gate to release perf_events not marked in
* pmc_in_use only once in a vcpu time slice.
@@ -588,6 +620,8 @@ struct kvm_vcpu_hv {
u32 enlightenments_eax; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */
u32 enlightenments_ebx; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EBX */
u32 syndbg_cap_eax; /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */
+ u32 nested_eax; /* HYPERV_CPUID_NESTED_FEATURES.EAX */
+ u32 nested_ebx; /* HYPERV_CPUID_NESTED_FEATURES.EBX */
} cpuid_cache;
};
@@ -595,15 +629,31 @@ struct kvm_vcpu_hv {
struct kvm_vcpu_xen {
u64 hypercall_rip;
u32 current_runstate;
- bool vcpu_info_set;
- bool vcpu_time_info_set;
- bool runstate_set;
- struct gfn_to_hva_cache vcpu_info_cache;
- struct gfn_to_hva_cache vcpu_time_info_cache;
- struct gfn_to_hva_cache runstate_cache;
+ u8 upcall_vector;
+ struct gfn_to_pfn_cache vcpu_info_cache;
+ struct gfn_to_pfn_cache vcpu_time_info_cache;
+ struct gfn_to_pfn_cache runstate_cache;
u64 last_steal;
u64 runstate_entry_time;
u64 runstate_times[4];
+ unsigned long evtchn_pending_sel;
+ u32 vcpu_id; /* The Xen / ACPI vCPU ID */
+ u32 timer_virq;
+ u64 timer_expires; /* In guest epoch */
+ atomic_t timer_pending;
+ struct hrtimer timer;
+ int poll_evtchn;
+ struct timer_list poll_timer;
+};
+
+struct kvm_queued_exception {
+ bool pending;
+ bool injected;
+ bool has_error_code;
+ u8 vector;
+ u32 error_code;
+ unsigned long payload;
+ bool has_payload;
};
struct kvm_vcpu_arch {
@@ -629,7 +679,6 @@ struct kvm_vcpu_arch {
u64 efer;
u64 apic_base;
struct kvm_lapic *apic; /* kernel irqchip context */
- bool apicv_active;
bool load_eoi_exitmap_pending;
DECLARE_BITMAP(ioapic_handled_vectors, 256);
unsigned long apic_attention;
@@ -638,8 +687,10 @@ struct kvm_vcpu_arch {
u64 ia32_misc_enable_msr;
u64 smbase;
u64 smi_count;
+ bool at_instruction_boundary;
bool tpr_access_reporting;
bool xsaves_enabled;
+ bool xfd_no_write_intercept;
u64 ia32_xss;
u64 microcode_version;
u64 arch_capabilities;
@@ -678,7 +729,7 @@ struct kvm_vcpu_arch {
struct kvm_mmu_memory_cache mmu_pte_list_desc_cache;
struct kvm_mmu_memory_cache mmu_shadow_page_cache;
- struct kvm_mmu_memory_cache mmu_gfn_array_cache;
+ struct kvm_mmu_memory_cache mmu_shadowed_info_cache;
struct kvm_mmu_memory_cache mmu_page_header_cache;
/*
@@ -704,16 +755,12 @@ struct kvm_vcpu_arch {
u8 event_exit_inst_len;
- struct kvm_queued_exception {
- bool pending;
- bool injected;
- bool has_error_code;
- u8 nr;
- u32 error_code;
- unsigned long payload;
- bool has_payload;
- u8 nested_apf;
- } exception;
+ bool exception_from_userspace;
+
+ /* Exceptions to be injected to the guest. */
+ struct kvm_queued_exception exception;
+ /* Exception VM-Exits to be synthesized to L1. */
+ struct kvm_queued_exception exception_vmexit;
struct kvm_queued_interrupt {
bool injected;
@@ -740,8 +787,7 @@ struct kvm_vcpu_arch {
gpa_t time;
struct pvclock_vcpu_time_info hv_clock;
unsigned int hw_tsc_khz;
- struct gfn_to_hva_cache pv_time;
- bool pv_time_enabled;
+ struct gfn_to_pfn_cache pv_time;
/* set guest stopped flag in pvclock flags field */
bool pvclock_set_guest_stopped_request;
@@ -774,6 +820,7 @@ struct kvm_vcpu_arch {
unsigned nmi_pending; /* NMI queued after currently running handler */
bool nmi_injected; /* Trying to inject an NMI this entry */
bool smi_pending; /* SMI queued after currently running handler */
+ u8 handling_intr_from_guest;
struct kvm_mtrr mtrr_state;
u64 pat;
@@ -792,6 +839,7 @@ struct kvm_vcpu_arch {
u64 mcg_ctl;
u64 mcg_ext_ctl;
u64 *mce_banks;
+ u64 *mci_ctl2_banks;
/* Cache MMIO info */
u64 mmio_gva;
@@ -823,7 +871,6 @@ struct kvm_vcpu_arch {
u32 id;
bool send_user_only;
u32 host_apf_flags;
- unsigned long nested_apf_token;
bool delivery_as_pf_vmexit;
bool pageready_pending;
} apf;
@@ -960,12 +1007,10 @@ enum hv_tsc_page_status {
HV_TSC_PAGE_UNSET = 0,
/* TSC page MSR was written by the guest, update pending */
HV_TSC_PAGE_GUEST_CHANGED,
- /* TSC page MSR was written by KVM userspace, update pending */
+ /* TSC page update was triggered from the host side */
HV_TSC_PAGE_HOST_CHANGED,
/* TSC page was properly set up and is currently active */
HV_TSC_PAGE_SET,
- /* TSC page is currently being updated and therefore is inactive */
- HV_TSC_PAGE_UPDATING,
/* TSC page was set up with an inaccessible GPA */
HV_TSC_PAGE_BROKEN,
};
@@ -1012,9 +1057,12 @@ struct msr_bitmap_range {
/* Xen emulation context */
struct kvm_xen {
+ u32 xen_version;
bool long_mode;
u8 upcall_vector;
- gfn_t shinfo_gfn;
+ struct gfn_to_pfn_cache shinfo_cache;
+ struct idr evtchn_ports;
+ unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
};
enum kvm_irqchip_mode {
@@ -1029,14 +1077,75 @@ struct kvm_x86_msr_filter {
struct msr_bitmap_range ranges[16];
};
-#define APICV_INHIBIT_REASON_DISABLE 0
-#define APICV_INHIBIT_REASON_HYPERV 1
-#define APICV_INHIBIT_REASON_NESTED 2
-#define APICV_INHIBIT_REASON_IRQWIN 3
-#define APICV_INHIBIT_REASON_PIT_REINJ 4
-#define APICV_INHIBIT_REASON_X2APIC 5
-#define APICV_INHIBIT_REASON_BLOCKIRQ 6
-#define APICV_INHIBIT_REASON_ABSENT 7
+enum kvm_apicv_inhibit {
+
+ /********************************************************************/
+ /* INHIBITs that are relevant to both Intel's APICv and AMD's AVIC. */
+ /********************************************************************/
+
+ /*
+ * APIC acceleration is disabled by a module parameter
+ * and/or not supported in hardware.
+ */
+ APICV_INHIBIT_REASON_DISABLE,
+
+ /*
+ * APIC acceleration is inhibited because AutoEOI feature is
+ * being used by a HyperV guest.
+ */
+ APICV_INHIBIT_REASON_HYPERV,
+
+ /*
+ * APIC acceleration is inhibited because the userspace didn't yet
+ * enable the kernel/split irqchip.
+ */
+ APICV_INHIBIT_REASON_ABSENT,
+
+ /* APIC acceleration is inhibited because KVM_GUESTDBG_BLOCKIRQ
+ * (out of band, debug measure of blocking all interrupts on this vCPU)
+ * was enabled, to avoid AVIC/APICv bypassing it.
+ */
+ APICV_INHIBIT_REASON_BLOCKIRQ,
+
+ /*
+ * For simplicity, the APIC acceleration is inhibited
+ * first time either APIC ID or APIC base are changed by the guest
+ * from their reset values.
+ */
+ APICV_INHIBIT_REASON_APIC_ID_MODIFIED,
+ APICV_INHIBIT_REASON_APIC_BASE_MODIFIED,
+
+ /******************************************************/
+ /* INHIBITs that are relevant only to the AMD's AVIC. */
+ /******************************************************/
+
+ /*
+ * AVIC is inhibited on a vCPU because it runs a nested guest.
+ *
+ * This is needed because unlike APICv, the peers of this vCPU
+ * cannot use the doorbell mechanism to signal interrupts via AVIC when
+ * a vCPU runs nested.
+ */
+ APICV_INHIBIT_REASON_NESTED,
+
+ /*
+ * On SVM, the wait for the IRQ window is implemented with pending vIRQ,
+ * which cannot be injected when the AVIC is enabled, thus AVIC
+ * is inhibited while KVM waits for IRQ window.
+ */
+ APICV_INHIBIT_REASON_IRQWIN,
+
+ /*
+ * PIT (i8254) 're-inject' mode, relies on EOI intercept,
+ * which AVIC doesn't support for edge triggered interrupts.
+ */
+ APICV_INHIBIT_REASON_PIT_REINJ,
+
+ /*
+ * AVIC is disabled because SEV doesn't support it.
+ */
+ APICV_INHIBIT_REASON_SEV,
+};
struct kvm_arch {
unsigned long n_used_mmu_pages;
@@ -1104,6 +1213,8 @@ struct kvm_arch {
u64 cur_tsc_generation;
int nr_vcpus_matched_tsc;
+ u32 default_tsc_khz;
+
seqcount_raw_spinlock_t pvclock_sc;
bool use_master_clock;
u64 master_kernel_ns;
@@ -1119,10 +1230,6 @@ struct kvm_arch {
struct kvm_hv hyperv;
struct kvm_xen xen;
- #ifdef CONFIG_KVM_MMU_AUDIT
- int audit_point;
- #endif
-
bool backwards_tsc_observed;
bool boot_vcpu_runs_old_kvmclock;
u32 bsp_vcpu_id;
@@ -1141,7 +1248,13 @@ struct kvm_arch {
bool guest_can_read_msr_platform_info;
bool exception_payload_enabled;
+ bool triple_fault_event;
+
bool bus_lock_detection_enabled;
+ bool enable_pmu;
+
+ u32 notify_window;
+ u32 notify_vmexit_flags;
/*
* If exit_on_emulation_error is set, and the in-kernel instruction
* emulator fails to emulate an instruction, allow userspace
@@ -1172,8 +1285,8 @@ struct kvm_arch {
bool tdp_mmu_enabled;
/*
- * List of struct kvm_mmu_pages being used as roots.
- * All struct kvm_mmu_pages in the list should have
+ * List of kvm_mmu_page structs being used as roots.
+ * All kvm_mmu_page structs in the list should have
* tdp_mmu_page set.
*
* For reads, this list is protected by:
@@ -1192,8 +1305,8 @@ struct kvm_arch {
struct list_head tdp_mmu_roots;
/*
- * List of struct kvmp_mmu_pages not being used as roots.
- * All struct kvm_mmu_pages in the list should have
+ * List of kvm_mmu_page structs not being used as roots.
+ * All kvm_mmu_page structs in the list should have
* tdp_mmu_page set and a tdp_mmu_root_count of 0.
*/
struct list_head tdp_mmu_pages;
@@ -1203,14 +1316,15 @@ struct kvm_arch {
* is held in read mode:
* - tdp_mmu_roots (above)
* - tdp_mmu_pages (above)
- * - the link field of struct kvm_mmu_pages used by the TDP MMU
+ * - the link field of kvm_mmu_page structs used by the TDP MMU
* - lpage_disallowed_mmu_pages
- * - the lpage_disallowed_link field of struct kvm_mmu_pages used
+ * - the lpage_disallowed_link field of kvm_mmu_page structs used
* by the TDP MMU
* It is acceptable, but not necessary, to acquire this lock when
* the thread holds the MMU lock in write mode.
*/
spinlock_t tdp_mmu_pages_lock;
+ struct workqueue_struct *tdp_mmu_zap_wq;
#endif /* CONFIG_X86_64 */
/*
@@ -1224,6 +1338,36 @@ struct kvm_arch {
hpa_t hv_root_tdp;
spinlock_t hv_root_tdp_lock;
#endif
+ /*
+ * VM-scope maximum vCPU ID. Used to determine the size of structures
+ * that increase along with the maximum vCPU ID, in which case, using
+ * the global KVM_MAX_VCPU_IDS may lead to significant memory waste.
+ */
+ u32 max_vcpu_ids;
+
+ bool disable_nx_huge_pages;
+
+ /*
+ * Memory caches used to allocate shadow pages when performing eager
+ * page splitting. No need for a shadowed_info_cache since eager page
+ * splitting only allocates direct shadow pages.
+ *
+ * Protected by kvm->slots_lock.
+ */
+ struct kvm_mmu_memory_cache split_shadow_page_cache;
+ struct kvm_mmu_memory_cache split_page_header_cache;
+
+ /*
+ * Memory cache used to allocate pte_list_desc structs while splitting
+ * huge pages. In the worst case, to split one huge page, 512
+ * pte_list_desc structs are needed to add each lower level leaf sptep
+ * to the rmap plus 1 to extend the parent_ptes rmap of the lower level
+ * page table.
+ *
+ * Protected by kvm->slots_lock.
+ */
+#define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1)
+ struct kvm_mmu_memory_cache split_desc_cache;
};
struct kvm_vm_stat {
@@ -1250,7 +1394,12 @@ struct kvm_vm_stat {
struct kvm_vcpu_stat {
struct kvm_vcpu_stat_generic generic;
+ u64 pf_taken;
u64 pf_fixed;
+ u64 pf_emulate;
+ u64 pf_spurious;
+ u64 pf_fast;
+ u64 pf_mmio_spte_created;
u64 pf_guest;
u64 tlb_flush;
u64 invlpg;
@@ -1276,7 +1425,10 @@ struct kvm_vcpu_stat {
u64 nested_run;
u64 directed_yield_attempted;
u64 directed_yield_successful;
+ u64 preemption_reported;
+ u64 preemption_other;
u64 guest_mode;
+ u64 notify_window_exits;
};
struct x86_instruction_info;
@@ -1309,7 +1461,6 @@ struct kvm_x86_ops {
int (*hardware_enable)(void);
void (*hardware_disable)(void);
void (*hardware_unsetup)(void);
- bool (*cpu_has_accelerated_tpr)(void);
bool (*has_emulated_msr)(struct kvm *kvm, u32 index);
void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
@@ -1318,11 +1469,12 @@ struct kvm_x86_ops {
void (*vm_destroy)(struct kvm *kvm);
/* Create, but do not attach this VCPU */
+ int (*vcpu_precreate)(struct kvm *kvm);
int (*vcpu_create)(struct kvm_vcpu *vcpu);
void (*vcpu_free)(struct kvm_vcpu *vcpu);
void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event);
- void (*prepare_guest_switch)(struct kvm_vcpu *vcpu);
+ void (*prepare_switch_to_guest)(struct kvm_vcpu *vcpu);
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
void (*vcpu_put)(struct kvm_vcpu *vcpu);
@@ -1337,6 +1489,7 @@ struct kvm_x86_ops {
struct kvm_segment *var, int seg);
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
+ void (*post_set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0);
void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
@@ -1349,9 +1502,10 @@ struct kvm_x86_ops {
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
+ bool (*get_if_flag)(struct kvm_vcpu *vcpu);
- void (*tlb_flush_all)(struct kvm_vcpu *vcpu);
- void (*tlb_flush_current)(struct kvm_vcpu *vcpu);
+ void (*flush_tlb_all)(struct kvm_vcpu *vcpu);
+ void (*flush_tlb_current)(struct kvm_vcpu *vcpu);
int (*tlb_remote_flush)(struct kvm *kvm);
int (*tlb_remote_flush_with_range)(struct kvm *kvm,
struct kvm_tlb_range *range);
@@ -1362,15 +1516,16 @@ struct kvm_x86_ops {
* Can potentially get non-canonical addresses through INVLPGs, which
* the implementation may choose to ignore if appropriate.
*/
- void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
+ void (*flush_tlb_gva)(struct kvm_vcpu *vcpu, gva_t addr);
/*
* Flush any TLB entries created by the guest. Like tlb_flush_gva(),
* does not need to flush GPA->HPA mappings.
*/
- void (*tlb_flush_guest)(struct kvm_vcpu *vcpu);
+ void (*flush_tlb_guest)(struct kvm_vcpu *vcpu);
- enum exit_fastpath_completion (*run)(struct kvm_vcpu *vcpu);
+ int (*vcpu_pre_run)(struct kvm_vcpu *vcpu);
+ enum exit_fastpath_completion (*vcpu_run)(struct kvm_vcpu *vcpu);
int (*handle_exit)(struct kvm_vcpu *vcpu,
enum exit_fastpath_completion exit_fastpath);
int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
@@ -1379,9 +1534,9 @@ struct kvm_x86_ops {
u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
void (*patch_hypercall)(struct kvm_vcpu *vcpu,
unsigned char *hypercall_addr);
- void (*set_irq)(struct kvm_vcpu *vcpu);
- void (*set_nmi)(struct kvm_vcpu *vcpu);
- void (*queue_exception)(struct kvm_vcpu *vcpu);
+ void (*inject_irq)(struct kvm_vcpu *vcpu, bool reinjected);
+ void (*inject_nmi)(struct kvm_vcpu *vcpu);
+ void (*inject_exception)(struct kvm_vcpu *vcpu);
void (*cancel_injection)(struct kvm_vcpu *vcpu);
int (*interrupt_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
@@ -1390,19 +1545,20 @@ struct kvm_x86_ops {
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
- bool (*check_apicv_inhibit_reasons)(ulong bit);
+ bool (*check_apicv_inhibit_reasons)(enum kvm_apicv_inhibit reason);
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
- void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
+ void (*hwapic_isr_update)(int isr);
bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu);
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu);
- int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
+ void (*deliver_interrupt)(struct kvm_lapic *apic, int delivery_mode,
+ int trig_mode, int vector);
int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
- u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
+ u8 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa,
int root_level);
@@ -1439,28 +1595,14 @@ struct kvm_x86_ops {
int cpu_dirty_log_size;
void (*update_cpu_dirty_logging)(struct kvm_vcpu *vcpu);
- /* pmu operations of sub-arch */
- const struct kvm_pmu_ops *pmu_ops;
const struct kvm_x86_nested_ops *nested_ops;
- /*
- * Architecture specific hooks for vCPU blocking due to
- * HLT instruction.
- * Returns for .pre_block():
- * - 0 means continue to block the vCPU.
- * - 1 means we cannot block the vCPU since some event
- * happens during this period, such as, 'ON' bit in
- * posted-interrupts descriptor is set.
- */
- int (*pre_block)(struct kvm_vcpu *vcpu);
- void (*post_block)(struct kvm_vcpu *vcpu);
-
void (*vcpu_blocking)(struct kvm_vcpu *vcpu);
void (*vcpu_unblocking)(struct kvm_vcpu *vcpu);
- int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
+ int (*pi_update_irte)(struct kvm *kvm, unsigned int host_irq,
uint32_t guest_irq, bool set);
- void (*start_assignment)(struct kvm *kvm);
+ void (*pi_start_assignment)(struct kvm *kvm);
void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu);
@@ -1475,15 +1617,17 @@ struct kvm_x86_ops {
int (*leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
void (*enable_smi_window)(struct kvm_vcpu *vcpu);
- int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
- int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
- int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
+ int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp);
+ int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp);
+ int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp);
int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
+ void (*guest_memory_reclaimed)(struct kvm *kvm);
int (*get_msr_feature)(struct kvm_msr_entry *entry);
- bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, void *insn, int insn_len);
+ bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type,
+ void *insn, int insn_len);
bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
@@ -1493,11 +1637,19 @@ struct kvm_x86_ops {
int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);
void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
+
+ /*
+ * Returns vCPU specific APICv inhibit reasons
+ */
+ unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu);
};
struct kvm_x86_nested_ops {
+ void (*leave_nested)(struct kvm_vcpu *vcpu);
+ bool (*is_exception_vmexit)(struct kvm_vcpu *vcpu, u8 vector,
+ u32 error_code);
int (*check_events)(struct kvm_vcpu *vcpu);
- bool (*hv_timer_pending)(struct kvm_vcpu *vcpu);
+ bool (*has_events)(struct kvm_vcpu *vcpu);
void (*triple_fault)(struct kvm_vcpu *vcpu);
int (*get_state)(struct kvm_vcpu *vcpu,
struct kvm_nested_state __user *user_kvm_nested_state,
@@ -1518,8 +1670,10 @@ struct kvm_x86_init_ops {
int (*disabled_by_bios)(void);
int (*check_processor_compatibility)(void);
int (*hardware_setup)(void);
+ unsigned int (*handle_intel_pt_intr)(void);
struct kvm_x86_ops *runtime_ops;
+ struct kvm_pmu_ops *pmu_ops;
};
struct kvm_arch_async_pf {
@@ -1537,16 +1691,9 @@ extern struct kvm_x86_ops kvm_x86_ops;
#define KVM_X86_OP(func) \
DECLARE_STATIC_CALL(kvm_x86_##func, *(((struct kvm_x86_ops *)0)->func));
-#define KVM_X86_OP_NULL KVM_X86_OP
-#include <asm/kvm-x86-ops.h>
-
-static inline void kvm_ops_static_call_update(void)
-{
-#define KVM_X86_OP(func) \
- static_call_update(kvm_x86_##func, kvm_x86_ops.func);
-#define KVM_X86_OP_NULL KVM_X86_OP
+#define KVM_X86_OP_OPTIONAL KVM_X86_OP
+#define KVM_X86_OP_OPTIONAL_RET0 KVM_X86_OP
#include <asm/kvm-x86-ops.h>
-}
#define __KVM_HAVE_ARCH_VM_ALLOC
static inline struct kvm *kvm_arch_alloc_vm(void)
@@ -1567,12 +1714,16 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
return -ENOTSUPP;
}
-int kvm_mmu_module_init(void);
-void kvm_mmu_module_exit(void);
+#define kvm_arch_pmi_in_guest(vcpu) \
+ ((vcpu) && (vcpu)->arch.handling_intr_from_guest)
+
+void __init kvm_mmu_x86_module_init(void);
+int kvm_mmu_vendor_module_init(void);
+void kvm_mmu_vendor_module_exit(void);
void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
int kvm_mmu_create(struct kvm_vcpu *vcpu);
-void kvm_mmu_init_vm(struct kvm *kvm);
+int kvm_mmu_init_vm(struct kvm *kvm);
void kvm_mmu_uninit_vm(struct kvm *kvm);
void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu);
@@ -1580,16 +1731,22 @@ void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
const struct kvm_memory_slot *memslot,
int start_level);
+void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot,
+ int target_level);
+void kvm_mmu_try_split_huge_pages(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot,
+ u64 start, u64 end,
+ int target_level);
void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
const struct kvm_memory_slot *memslot);
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
const struct kvm_memory_slot *memslot);
void kvm_mmu_zap_all(struct kvm *kvm);
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
-unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
-int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
+int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
const void *val, int bytes);
@@ -1611,21 +1768,6 @@ extern bool tdp_enabled;
u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
-/* control of guest tsc rate supported? */
-extern bool kvm_has_tsc_control;
-/* maximum supported tsc_khz for guests */
-extern u32 kvm_max_guest_tsc_khz;
-/* number of bits of the fractional part of the TSC scaling ratio */
-extern u8 kvm_tsc_scaling_ratio_frac_bits;
-/* maximum allowed value of TSC scaling ratio */
-extern u64 kvm_max_tsc_scaling_ratio;
-/* 1ull << kvm_tsc_scaling_ratio_frac_bits */
-extern u64 kvm_default_tsc_scaling_ratio;
-/* bus lock detection supported? */
-extern bool kvm_has_bus_lock_exit;
-
-extern u64 kvm_mce_cap_supported;
-
/*
* EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing
* userspace I/O) to indicate that the emulation context
@@ -1639,7 +1781,8 @@ extern u64 kvm_mce_cap_supported;
*
* EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to
* decode the instruction length. For use *only* by
- * kvm_x86_ops.skip_emulated_instruction() implementations.
+ * kvm_x86_ops.skip_emulated_instruction() implementations if
+ * EMULTYPE_COMPLETE_USER_EXIT is not set.
*
* EMULTYPE_ALLOW_RETRY_PF - Set when the emulator should resume the guest to
* retry native execution under certain conditions,
@@ -1659,6 +1802,10 @@ extern u64 kvm_mce_cap_supported;
*
* EMULTYPE_PF - Set when emulating MMIO by way of an intercepted #PF, in which
* case the CR2/GPA value pass on the stack is valid.
+ *
+ * EMULTYPE_COMPLETE_USER_EXIT - Set when the emulator should update interruptibility
+ * state and inject single-step #DBs after skipping
+ * an instruction (after completing userspace I/O).
*/
#define EMULTYPE_NO_DECODE (1 << 0)
#define EMULTYPE_TRAP_UD (1 << 1)
@@ -1667,6 +1814,7 @@ extern u64 kvm_mce_cap_supported;
#define EMULTYPE_TRAP_UD_FORCED (1 << 4)
#define EMULTYPE_VMWARE_GP (1 << 5)
#define EMULTYPE_PF (1 << 6)
+#define EMULTYPE_COMPLETE_USER_EXIT (1 << 7)
int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
@@ -1691,7 +1839,7 @@ int kvm_emulate_monitor(struct kvm_vcpu *vcpu);
int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in);
int kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
int kvm_emulate_halt(struct kvm_vcpu *vcpu);
-int kvm_vcpu_halt(struct kvm_vcpu *vcpu);
+int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu);
int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu);
int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
@@ -1712,7 +1860,6 @@ int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val);
void kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val);
unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu);
void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
-void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu);
int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
@@ -1728,7 +1875,7 @@ void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long pay
void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr);
void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
-bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
+void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
struct x86_exception *fault);
bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr);
@@ -1757,12 +1904,9 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu);
void kvm_update_dr7(struct kvm_vcpu *vcpu);
int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
-void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
-void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu,
ulong roots_to_free);
-void kvm_mmu_free_guest_mode_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu);
-gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
- struct x86_exception *exception);
+void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu);
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception);
gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
@@ -1773,12 +1917,24 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception);
bool kvm_apicv_activated(struct kvm *kvm);
+bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu);
void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
-void kvm_request_apicv_update(struct kvm *kvm, bool activate,
- unsigned long bit);
+void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
+ enum kvm_apicv_inhibit reason, bool set);
+void kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
+ enum kvm_apicv_inhibit reason, bool set);
-void __kvm_request_apicv_update(struct kvm *kvm, bool activate,
- unsigned long bit);
+static inline void kvm_set_apicv_inhibit(struct kvm *kvm,
+ enum kvm_apicv_inhibit reason)
+{
+ kvm_set_or_clear_apicv_inhibit(kvm, reason, true);
+}
+
+static inline void kvm_clear_apicv_inhibit(struct kvm *kvm,
+ enum kvm_apicv_inhibit reason)
+{
+ kvm_set_or_clear_apicv_inhibit(kvm, reason, false);
+}
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
@@ -1855,7 +2011,6 @@ int kvm_cpu_has_extint(struct kvm_vcpu *v);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
-void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
unsigned long ipi_bitmap_high, u32 min,
@@ -1870,7 +2025,7 @@ static inline bool kvm_is_supported_user_return_msr(u32 msr)
return kvm_find_user_return_msr(msr) >= 0;
}
-u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc, u64 ratio);
+u64 kvm_scale_tsc(u64 tsc, u64 ratio);
u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier);
u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier);
@@ -1896,8 +2051,6 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);
-int kvm_is_in_guest(void);
-
void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
u32 size);
bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
@@ -1926,8 +2079,6 @@ static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
static_call_cond(kvm_x86_vcpu_unblocking)(vcpu);
}
-static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
-
static inline int kvm_cpu_get_apicid(int mps_cpu)
{
#ifdef CONFIG_X86_LOCAL_APIC
@@ -1951,4 +2102,13 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
#define KVM_CLOCK_VALID_FLAGS \
(KVM_CLOCK_TSC_STABLE | KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC)
+#define KVM_X86_VALID_QUIRKS \
+ (KVM_X86_QUIRK_LINT0_REENABLED | \
+ KVM_X86_QUIRK_CD_NW_CLEARED | \
+ KVM_X86_QUIRK_LAPIC_MMIO_HOLE | \
+ KVM_X86_QUIRK_OUT_7E_INC_RIP | \
+ KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT | \
+ KVM_X86_QUIRK_FIX_HYPERCALL_INSN | \
+ KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS)
+
#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h
index 9d4a3b1b25b9..eb186bc57f6a 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -63,9 +63,9 @@ void kvm_slot_page_track_add_page(struct kvm *kvm,
void kvm_slot_page_track_remove_page(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn,
enum kvm_page_track_mode mode);
-bool kvm_slot_page_track_is_active(struct kvm_vcpu *vcpu,
- struct kvm_memory_slot *slot, gfn_t gfn,
- enum kvm_page_track_mode mode);
+bool kvm_slot_page_track_is_active(struct kvm *kvm,
+ const struct kvm_memory_slot *slot,
+ gfn_t gfn, enum kvm_page_track_mode mode);
void
kvm_page_track_register_notifier(struct kvm *kvm,
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 56935ebb1dfe..57bc74e112f2 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -7,6 +7,8 @@
#include <linux/interrupt.h>
#include <uapi/asm/kvm_para.h>
+#include <asm/tdx.h>
+
#ifdef CONFIG_KVM_GUEST
bool kvm_check_and_clear_guest_paused(void);
#else
@@ -32,6 +34,10 @@ static inline bool kvm_check_and_clear_guest_paused(void)
static inline long kvm_hypercall0(unsigned int nr)
{
long ret;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return tdx_kvm_hypercall(nr, 0, 0, 0, 0);
+
asm volatile(KVM_HYPERCALL
: "=a"(ret)
: "a"(nr)
@@ -42,6 +48,10 @@ static inline long kvm_hypercall0(unsigned int nr)
static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
{
long ret;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return tdx_kvm_hypercall(nr, p1, 0, 0, 0);
+
asm volatile(KVM_HYPERCALL
: "=a"(ret)
: "a"(nr), "b"(p1)
@@ -53,6 +63,10 @@ static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
unsigned long p2)
{
long ret;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return tdx_kvm_hypercall(nr, p1, p2, 0, 0);
+
asm volatile(KVM_HYPERCALL
: "=a"(ret)
: "a"(nr), "b"(p1), "c"(p2)
@@ -64,6 +78,10 @@ static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
unsigned long p2, unsigned long p3)
{
long ret;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return tdx_kvm_hypercall(nr, p1, p2, p3, 0);
+
asm volatile(KVM_HYPERCALL
: "=a"(ret)
: "a"(nr), "b"(p1), "c"(p2), "d"(p3)
@@ -76,6 +94,10 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
unsigned long p4)
{
long ret;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return tdx_kvm_hypercall(nr, p1, p2, p3, p4);
+
asm volatile(KVM_HYPERCALL
: "=a"(ret)
: "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4)
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 365111789cc6..f484d656d34e 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -3,6 +3,7 @@
#define _ASM_X86_LINKAGE_H
#include <linux/stringify.h>
+#include <asm/ibt.h>
#undef notrace
#define notrace __attribute__((no_instrument_function))
@@ -18,7 +19,71 @@
#define __ALIGN_STR __stringify(__ALIGN)
#endif
+#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+#define RET jmp __x86_return_thunk
+#else /* CONFIG_RETPOLINE */
+#ifdef CONFIG_SLS
+#define RET ret; int3
+#else
+#define RET ret
+#endif
+#endif /* CONFIG_RETPOLINE */
+
+#else /* __ASSEMBLY__ */
+
+#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+#define ASM_RET "jmp __x86_return_thunk\n\t"
+#else /* CONFIG_RETPOLINE */
+#ifdef CONFIG_SLS
+#define ASM_RET "ret; int3\n\t"
+#else
+#define ASM_RET "ret\n\t"
+#endif
+#endif /* CONFIG_RETPOLINE */
+
#endif /* __ASSEMBLY__ */
+#define __CFI_TYPE(name) \
+ SYM_START(__cfi_##name, SYM_L_LOCAL, SYM_A_NONE) \
+ .fill 11, 1, 0x90 ASM_NL \
+ .byte 0xb8 ASM_NL \
+ .long __kcfi_typeid_##name ASM_NL \
+ SYM_FUNC_END(__cfi_##name)
+
+/* SYM_TYPED_FUNC_START -- use for indirectly called globals, w/ CFI type */
+#define SYM_TYPED_FUNC_START(name) \
+ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \
+ ENDBR
+
+/* SYM_FUNC_START -- use for global functions */
+#define SYM_FUNC_START(name) \
+ SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \
+ ENDBR
+
+/* SYM_FUNC_START_NOALIGN -- use for global functions, w/o alignment */
+#define SYM_FUNC_START_NOALIGN(name) \
+ SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) \
+ ENDBR
+
+/* SYM_FUNC_START_LOCAL -- use for local functions */
+#define SYM_FUNC_START_LOCAL(name) \
+ SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) \
+ ENDBR
+
+/* SYM_FUNC_START_LOCAL_NOALIGN -- use for local functions, w/o alignment */
+#define SYM_FUNC_START_LOCAL_NOALIGN(name) \
+ SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) \
+ ENDBR
+
+/* SYM_FUNC_START_WEAK -- use for weak functions */
+#define SYM_FUNC_START_WEAK(name) \
+ SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN) \
+ ENDBR
+
+/* SYM_FUNC_START_WEAK_NOALIGN -- use for weak functions, w/o alignment */
+#define SYM_FUNC_START_WEAK_NOALIGN(name) \
+ SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \
+ ENDBR
+
#endif /* _ASM_X86_LINKAGE_H */
diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h
deleted file mode 100644
index 7c5cc6660e4b..000000000000
--- a/arch/x86/include/asm/livepatch.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * livepatch.h - x86-specific Kernel Live Patching Core
- *
- * Copyright (C) 2014 Seth Jennings <sjenning@redhat.com>
- * Copyright (C) 2014 SUSE
- */
-
-#ifndef _ASM_X86_LIVEPATCH_H
-#define _ASM_X86_LIVEPATCH_H
-
-#include <asm/setup.h>
-#include <linux/ftrace.h>
-
-static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long ip)
-{
- ftrace_instruction_pointer_set(fregs, ip);
-}
-
-#endif /* _ASM_X86_LIVEPATCH_H */
diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h
index 97198001e567..6115bb3d5795 100644
--- a/arch/x86/include/asm/mc146818rtc.h
+++ b/arch/x86/include/asm/mc146818rtc.h
@@ -95,7 +95,7 @@ static inline unsigned char current_lock_cmos_reg(void)
unsigned char rtc_cmos_read(unsigned char addr);
void rtc_cmos_write(unsigned char val, unsigned char addr);
-extern int mach_set_rtc_mmss(const struct timespec64 *now);
+extern int mach_set_cmos_time(const struct timespec64 *now);
extern void mach_get_cmos_time(struct timespec64 *now);
#define RTC_IRQ 8
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 8f6395d9e209..6e986088817d 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -42,6 +42,7 @@
#define MCI_STATUS_CEC_SHIFT 38 /* Corrected Error Count */
#define MCI_STATUS_CEC_MASK GENMASK_ULL(52,38)
#define MCI_STATUS_CEC(c) (((c) & MCI_STATUS_CEC_MASK) >> MCI_STATUS_CEC_SHIFT)
+#define MCI_STATUS_MSCOD(m) (((m) >> 16) & 0xffff)
/* AMD-specific bits */
#define MCI_STATUS_TCC BIT_ULL(55) /* Task context corrupt */
@@ -313,31 +314,22 @@ enum smca_bank_types {
SMCA_SMU, /* System Management Unit */
SMCA_SMU_V2,
SMCA_MP5, /* Microprocessor 5 Unit */
+ SMCA_MPDMA, /* MPDMA Unit */
SMCA_NBIO, /* Northbridge IO Unit */
SMCA_PCIE, /* PCI Express Unit */
SMCA_PCIE_V2,
SMCA_XGMI_PCS, /* xGMI PCS Unit */
+ SMCA_NBIF, /* NBIF Unit */
+ SMCA_SHUB, /* System HUB Unit */
+ SMCA_SATA, /* SATA Unit */
+ SMCA_USB, /* USB Unit */
+ SMCA_GMI_PCS, /* GMI PCS Unit */
SMCA_XGMI_PHY, /* xGMI PHY Unit */
SMCA_WAFL_PHY, /* WAFL PHY Unit */
+ SMCA_GMI_PHY, /* GMI PHY Unit */
N_SMCA_BANK_TYPES
};
-#define HWID_MCATYPE(hwid, mcatype) (((hwid) << 16) | (mcatype))
-
-struct smca_hwid {
- unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */
- u32 hwid_mcatype; /* (hwid,mcatype) tuple */
- u8 count; /* Number of instances. */
-};
-
-struct smca_bank {
- struct smca_hwid *hwid;
- u32 id; /* Value of MCA_IPID[InstanceId]. */
- u8 sysfs_id; /* Value used for sysfs name. */
-};
-
-extern struct smca_bank smca_banks[MAX_NR_BANKS];
-
extern const char *smca_get_long_name(enum smca_bank_types t);
extern bool amd_mce_is_memory_error(struct mce *m);
@@ -345,16 +337,13 @@ extern int mce_threshold_create_device(unsigned int cpu);
extern int mce_threshold_remove_device(unsigned int cpu);
void mce_amd_feature_init(struct cpuinfo_x86 *c);
-int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr);
-enum smca_bank_types smca_get_bank_type(unsigned int bank);
+enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank);
#else
static inline int mce_threshold_create_device(unsigned int cpu) { return 0; };
static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; };
static inline bool amd_mce_is_memory_error(struct mce *m) { return false; };
static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
-static inline int
-umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return -EINVAL; };
#endif
static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); }
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index e2c6f433ed10..72ca90552b6a 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -49,9 +49,6 @@ void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages,
void __init mem_encrypt_free_decrypted_mem(void);
-/* Architecture __weak replacement functions */
-void __init mem_encrypt_init(void);
-
void __init sev_es_init_vc_handling(void);
#define __bss_decrypted __section(".bss..decrypted")
@@ -89,6 +86,11 @@ static inline void mem_encrypt_free_decrypted_mem(void) { }
#endif /* CONFIG_AMD_MEM_ENCRYPT */
+/* Architecture __weak replacement functions */
+void __init mem_encrypt_init(void);
+
+void add_encrypt_protection_map(void);
+
/*
* The __sme_pa() and __sme_pa_nodebug() macros are meant for use when
* writing to or comparing values from the cr3 register. Having the
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index d6bfdfb0f0af..74ecc2bd6cd0 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -9,6 +9,7 @@
struct ucode_patch {
struct list_head plist;
void *data; /* Intel uses only this one */
+ unsigned int size;
u32 patch_id;
u16 equiv_cpu;
};
@@ -32,9 +33,6 @@ enum ucode_state {
};
struct microcode_ops {
- enum ucode_state (*request_microcode_user) (int cpu,
- const void __user *buf, size_t size);
-
enum ucode_state (*request_microcode_fw) (int cpu, struct device *,
bool refresh_fw);
@@ -131,10 +129,12 @@ extern void __init load_ucode_bsp(void);
extern void load_ucode_ap(void);
void reload_early_microcode(void);
extern bool initrd_gone;
+void microcode_bsp_resume(void);
#else
static inline void __init load_ucode_bsp(void) { }
static inline void load_ucode_ap(void) { }
static inline void reload_early_microcode(void) { }
+static inline void microcode_bsp_resume(void) { }
#endif
#endif /* _ASM_X86_MICROCODE_H */
diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
index 7063b5a43220..ac31f9140d07 100644
--- a/arch/x86/include/asm/microcode_amd.h
+++ b/arch/x86/include/asm/microcode_amd.h
@@ -38,7 +38,7 @@ struct microcode_header_amd {
struct microcode_amd {
struct microcode_header_amd hdr;
- unsigned int mpb[0];
+ unsigned int mpb[];
};
#define PATCH_MAX_SIZE (3 * PAGE_SIZE)
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
index d85a07d7154f..4c92cea7e4b5 100644
--- a/arch/x86/include/asm/microcode_intel.h
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -19,7 +19,7 @@ struct microcode_header_intel {
struct microcode_intel {
struct microcode_header_intel hdr;
- unsigned int bits[0];
+ unsigned int bits[];
};
/* microcode format is extended from prescott processors */
@@ -33,7 +33,7 @@ struct extended_sigtable {
unsigned int count;
unsigned int cksum;
unsigned int reserved[3];
- struct extended_signature sigs[0];
+ struct extended_signature sigs[];
};
#define DEFAULT_UCODE_DATASIZE (2000)
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 27516046117a..b8d40ddeab00 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -141,7 +141,7 @@ do { \
#ifdef CONFIG_X86_32
#define deactivate_mm(tsk, mm) \
do { \
- lazy_load_gs(0); \
+ loadsegment(gs, 0); \
} while (0)
#else
#define deactivate_mm(tsk, mm) \
diff --git a/arch/x86/include/asm/mmx.h b/arch/x86/include/asm/mmx.h
deleted file mode 100644
index f572d0f944bb..000000000000
--- a/arch/x86/include/asm/mmx.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_MMX_H
-#define _ASM_X86_MMX_H
-
-/*
- * MMX 3Dnow! helper operations
- */
-
-#include <linux/types.h>
-
-extern void *_mmx_memcpy(void *to, const void *from, size_t size);
-extern void mmx_clear_page(void *page);
-extern void mmx_copy_page(void *to, void *from);
-
-#endif /* _ASM_X86_MMX_H */
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index da3972fe5a7a..61f0c206bff0 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -30,7 +30,7 @@ extern void *hv_hypercall_pg;
extern u64 hv_current_partition_id;
-extern union hv_ghcb __percpu **hv_ghcb_pg;
+extern union hv_ghcb * __percpu *hv_ghcb_pg;
int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
@@ -169,13 +169,6 @@ bool hv_vcpu_is_preempted(int vcpu);
static inline void hv_apic_init(void) {}
#endif
-static inline void hv_set_msi_entry_from_desc(union hv_msi_entry *msi_entry,
- struct msi_desc *msi_desc)
-{
- msi_entry->address.as_uint32 = msi_desc->msg.address_lo;
- msi_entry->data.as_uint32 = msi_desc->msg.data;
-}
-
struct irq_domain *hv_create_pci_msi_domain(void);
int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector,
@@ -186,9 +179,13 @@ int hv_set_mem_host_visibility(unsigned long addr, int numpages, bool visible);
#ifdef CONFIG_AMD_MEM_ENCRYPT
void hv_ghcb_msr_write(u64 msr, u64 value);
void hv_ghcb_msr_read(u64 msr, u64 *value);
+bool hv_ghcb_negotiate_protocol(void);
+void hv_ghcb_terminate(unsigned int set, unsigned int reason);
#else
static inline void hv_ghcb_msr_write(u64 msr, u64 value) {}
static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {}
+static inline bool hv_ghcb_negotiate_protocol(void) { return false; }
+static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {}
#endif
extern bool hv_isolation_type_snp(void);
diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h
index b85147d75626..d71c7e8b738d 100644
--- a/arch/x86/include/asm/msi.h
+++ b/arch/x86/include/asm/msi.h
@@ -12,14 +12,17 @@ int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
/* Structs and defines for the X86 specific MSI message format */
typedef struct x86_msi_data {
- u32 vector : 8,
- delivery_mode : 3,
- dest_mode_logical : 1,
- reserved : 2,
- active_low : 1,
- is_level : 1;
-
- u32 dmar_subhandle;
+ union {
+ struct {
+ u32 vector : 8,
+ delivery_mode : 3,
+ dest_mode_logical : 1,
+ reserved : 2,
+ active_low : 1,
+ is_level : 1;
+ };
+ u32 dmar_subhandle;
+ };
} __attribute__ ((packed)) arch_msi_msg_data_t;
#define arch_msi_msg_data x86_msi_data
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 01e2650b9585..10ac52705892 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -51,6 +51,8 @@
#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
+#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
+#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
@@ -76,6 +78,8 @@
/* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */
#define MSR_IA32_CORE_CAPS 0x000000cf
+#define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT 2
+#define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS BIT(MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT)
#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT 5
#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT BIT(MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT)
@@ -91,6 +95,7 @@
#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */
#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */
+#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */
#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */
#define ARCH_CAP_SSB_NO BIT(4) /*
* Not susceptible to Speculative Store Bypass
@@ -114,6 +119,46 @@
* Not susceptible to
* TSX Async Abort (TAA) vulnerabilities.
*/
+#define ARCH_CAP_SBDR_SSDP_NO BIT(13) /*
+ * Not susceptible to SBDR and SSDP
+ * variants of Processor MMIO stale data
+ * vulnerabilities.
+ */
+#define ARCH_CAP_FBSDP_NO BIT(14) /*
+ * Not susceptible to FBSDP variant of
+ * Processor MMIO stale data
+ * vulnerabilities.
+ */
+#define ARCH_CAP_PSDP_NO BIT(15) /*
+ * Not susceptible to PSDP variant of
+ * Processor MMIO stale data
+ * vulnerabilities.
+ */
+#define ARCH_CAP_FB_CLEAR BIT(17) /*
+ * VERW clears CPU fill buffer
+ * even on MDS_NO CPUs.
+ */
+#define ARCH_CAP_FB_CLEAR_CTRL BIT(18) /*
+ * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]
+ * bit available to control VERW
+ * behavior.
+ */
+#define ARCH_CAP_RRSBA BIT(19) /*
+ * Indicates RET may use predictors
+ * other than the RSB. With eIBRS
+ * enabled predictions in kernel mode
+ * are restricted to targets in
+ * kernel.
+ */
+#define ARCH_CAP_PBRSB_NO BIT(24) /*
+ * Not susceptible to Post-Barrier
+ * Return Stack Buffer Predictions.
+ */
+
+#define ARCH_CAP_XAPIC_DISABLE BIT(21) /*
+ * IA32_XAPIC_DISABLE_STATUS MSR
+ * supported
+ */
#define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /*
@@ -128,9 +173,10 @@
#define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */
#define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */
-/* SRBDS support */
#define MSR_IA32_MCU_OPT_CTRL 0x00000123
-#define RNGDS_MITG_DIS BIT(0)
+#define RNGDS_MITG_DIS BIT(0) /* SRBDS support */
+#define RTM_ALLOW BIT(1) /* TSX development mode */
+#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */
#define MSR_IA32_SYSENTER_CS 0x00000174
#define MSR_IA32_SYSENTER_ESP 0x00000175
@@ -154,6 +200,11 @@
#define MSR_IA32_POWER_CTL 0x000001fc
#define MSR_IA32_POWER_CTL_BIT_EE 19
+/* Abbreviated from Intel SDM name IA32_INTEGRITY_CAPABILITIES */
+#define MSR_INTEGRITY_CAPS 0x000002d9
+#define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT 4
+#define MSR_INTEGRITY_CAPS_PERIODIC_BIST BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT)
+
#define MSR_LBR_NHM_FROM 0x00000680
#define MSR_LBR_NHM_TO 0x000006c0
#define MSR_LBR_CORE_FROM 0x00000040
@@ -189,6 +240,12 @@
#define PERF_CAP_PT_IDX 16
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
+#define PERF_CAP_PEBS_TRAP BIT_ULL(6)
+#define PERF_CAP_ARCH_REG BIT_ULL(7)
+#define PERF_CAP_PEBS_FORMAT 0xf00
+#define PERF_CAP_PEBS_BASELINE BIT_ULL(14)
+#define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \
+ PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE)
#define MSR_IA32_RTIT_CTL 0x00000570
#define RTIT_CTL_TRACEEN BIT(0)
@@ -205,6 +262,8 @@
#define RTIT_CTL_DISRETC BIT(11)
#define RTIT_CTL_PTW_EN BIT(12)
#define RTIT_CTL_BRANCH_EN BIT(13)
+#define RTIT_CTL_EVENT_EN BIT(31)
+#define RTIT_CTL_NOTNT BIT_ULL(55)
#define RTIT_CTL_MTC_RANGE_OFFSET 14
#define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
#define RTIT_CTL_CYC_THRESH_OFFSET 19
@@ -310,6 +369,7 @@
/* Run Time Average Power Limiting (RAPL) Interface */
+#define MSR_VR_CURRENT_CONFIG 0x00000601
#define MSR_RAPL_POWER_UNIT 0x00000606
#define MSR_PKG_POWER_LIMIT 0x00000610
@@ -343,6 +403,7 @@
#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C
#define MSR_PLATFORM_ENERGY_STATUS 0x0000064D
+#define MSR_SECONDARY_TURBO_RATIO_LIMIT 0x00000650
#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658
#define MSR_PKG_ANY_CORE_C0_RES 0x00000659
@@ -360,11 +421,29 @@
#define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c
#define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d
-
#define MSR_CORE_PERF_LIMIT_REASONS 0x00000690
#define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0
#define MSR_RING_PERF_LIMIT_REASONS 0x000006B1
+/* Control-flow Enforcement Technology MSRs */
+#define MSR_IA32_U_CET 0x000006a0 /* user mode cet */
+#define MSR_IA32_S_CET 0x000006a2 /* kernel mode cet */
+#define CET_SHSTK_EN BIT_ULL(0)
+#define CET_WRSS_EN BIT_ULL(1)
+#define CET_ENDBR_EN BIT_ULL(2)
+#define CET_LEG_IW_EN BIT_ULL(3)
+#define CET_NO_TRACK_EN BIT_ULL(4)
+#define CET_SUPPRESS_DISABLE BIT_ULL(5)
+#define CET_RESERVED (BIT_ULL(6) | BIT_ULL(7) | BIT_ULL(8) | BIT_ULL(9))
+#define CET_SUPPRESS BIT_ULL(10)
+#define CET_WAIT_ENDBR BIT_ULL(11)
+
+#define MSR_IA32_PL0_SSP 0x000006a4 /* ring-0 shadow stack pointer */
+#define MSR_IA32_PL1_SSP 0x000006a5 /* ring-1 shadow stack pointer */
+#define MSR_IA32_PL2_SSP 0x000006a6 /* ring-2 shadow stack pointer */
+#define MSR_IA32_PL3_SSP 0x000006a7 /* ring-3 shadow stack pointer */
+#define MSR_IA32_INT_SSP_TAB 0x000006a8 /* exception shadow stack table */
+
/* Hardware P state interface */
#define MSR_PPERF 0x0000064e
#define MSR_PERF_LIMIT_REASONS 0x0000064f
@@ -476,19 +555,50 @@
#define MSR_AMD64_ICIBSEXTDCTL 0xc001103c
#define MSR_AMD64_IBSOPDATA4 0xc001103d
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
+#define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b
#define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e
#define MSR_AMD64_SEV_ES_GHCB 0xc0010130
#define MSR_AMD64_SEV 0xc0010131
#define MSR_AMD64_SEV_ENABLED_BIT 0
#define MSR_AMD64_SEV_ES_ENABLED_BIT 1
+#define MSR_AMD64_SEV_SNP_ENABLED_BIT 2
#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
#define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT)
+#define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT)
#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
+/* AMD Collaborative Processor Performance Control MSRs */
+#define MSR_AMD_CPPC_CAP1 0xc00102b0
+#define MSR_AMD_CPPC_ENABLE 0xc00102b1
+#define MSR_AMD_CPPC_CAP2 0xc00102b2
+#define MSR_AMD_CPPC_REQ 0xc00102b3
+#define MSR_AMD_CPPC_STATUS 0xc00102b4
+
+#define AMD_CPPC_LOWEST_PERF(x) (((x) >> 0) & 0xff)
+#define AMD_CPPC_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff)
+#define AMD_CPPC_NOMINAL_PERF(x) (((x) >> 16) & 0xff)
+#define AMD_CPPC_HIGHEST_PERF(x) (((x) >> 24) & 0xff)
+
+#define AMD_CPPC_MAX_PERF(x) (((x) & 0xff) << 0)
+#define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8)
+#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16)
+#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24)
+
+/* AMD Performance Counter Global Status and Control MSRs */
+#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
+#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301
+#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302
+
+/* AMD Last Branch Record MSRs */
+#define MSR_AMD64_LBR_SELECT 0xc000010e
+
/* Fam 17h MSRs */
#define MSR_F17H_IRPERF 0xc00000e9
+#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3
+#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1)
+
/* Fam 16h MSRs */
#define MSR_F16H_L2I_PERF_CTL 0xc0010230
#define MSR_F16H_L2I_PERF_CTR 0xc0010231
@@ -650,6 +760,12 @@
#define MSR_IA32_PERF_CTL 0x00000199
#define INTEL_PERF_CTL_MASK 0xffff
+/* AMD Branch Sampling configuration */
+#define MSR_AMD_DBG_EXTN_CFG 0xc000010f
+#define MSR_AMD_SAMP_BR_FROM 0xc0010300
+
+#define DBG_EXTN_CFG_LBRV2EN BIT_ULL(6)
+
#define MSR_IA32_MPERF 0x000000e7
#define MSR_IA32_APERF 0x000000e8
@@ -687,12 +803,14 @@
#define PACKAGE_THERM_STATUS_PROCHOT (1 << 0)
#define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10)
+#define PACKAGE_THERM_STATUS_HFI_UPDATED (1 << 26)
#define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2
#define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0)
#define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1)
#define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24)
+#define PACKAGE_THERM_INT_HFI_ENABLE (1 << 25)
/* Thermal Thresholds Support */
#define THERM_INT_THRESHOLD0_ENABLE (1 << 15)
@@ -921,6 +1039,7 @@
#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f
#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490
#define MSR_IA32_VMX_VMFUNC 0x00000491
+#define MSR_IA32_VMX_PROCBASED_CTLS3 0x00000492
/* VMX_BASIC bits and bitmasks */
#define VMX_BASIC_VMCS_SIZE_SHIFT 32
@@ -941,4 +1060,16 @@
#define MSR_VM_IGNNE 0xc0010115
#define MSR_VM_HSAVE_PA 0xc0010117
+/* Hardware Feedback Interface */
+#define MSR_IA32_HW_FEEDBACK_PTR 0x17d0
+#define MSR_IA32_HW_FEEDBACK_CONFIG 0x17d1
+
+/* x2APIC locked status */
+#define MSR_IA32_XAPIC_DISABLE_STATUS 0xBD
+#define LEGACY_XAPIC_DISABLED BIT(0) /*
+ * x2APIC mode is locked and
+ * disabling x2APIC will cause
+ * a #GP
+ */
+
#endif /* _ASM_X86_MSR_INDEX_H */
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 6b52182e178a..65ec1965cd28 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -10,16 +10,7 @@
#include <asm/errno.h>
#include <asm/cpumask.h>
#include <uapi/asm/msr.h>
-
-struct msr {
- union {
- struct {
- u32 l;
- u32 h;
- };
- u64 q;
- };
-};
+#include <asm/shared/msr.h>
struct msr_info {
u32 msr_no;
@@ -137,17 +128,11 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr,
{
DECLARE_ARGS(val, low, high);
- asm volatile("2: rdmsr ; xor %[err],%[err]\n"
- "1:\n\t"
- ".section .fixup,\"ax\"\n\t"
- "3: mov %[fault],%[err]\n\t"
- "xorl %%eax, %%eax\n\t"
- "xorl %%edx, %%edx\n\t"
- "jmp 1b\n\t"
- ".previous\n\t"
- _ASM_EXTABLE(2b, 3b)
+ asm volatile("1: rdmsr ; xor %[err],%[err]\n"
+ "2:\n\t"
+ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_RDMSR_SAFE, %[err])
: [err] "=r" (*err), EAX_EDX_RET(val, low, high)
- : "c" (msr), [fault] "i" (-EIO));
+ : "c" (msr));
if (tracepoint_enabled(read_msr))
do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), *err);
return EAX_EDX_VAL(val, low, high);
@@ -169,15 +154,11 @@ native_write_msr_safe(unsigned int msr, u32 low, u32 high)
{
int err;
- asm volatile("2: wrmsr ; xor %[err],%[err]\n"
- "1:\n\t"
- ".section .fixup,\"ax\"\n\t"
- "3: mov %[fault],%[err] ; jmp 1b\n\t"
- ".previous\n\t"
- _ASM_EXTABLE(2b, 3b)
+ asm volatile("1: wrmsr ; xor %[err],%[err]\n"
+ "2:\n\t"
+ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_WRMSR_SAFE, %[err])
: [err] "=a" (err)
- : "c" (msr), "0" (low), "d" (high),
- [fault] "i" (-EIO)
+ : "c" (msr), "0" (low), "d" (high)
: "memory");
if (tracepoint_enabled(write_msr))
do_trace_write_msr(msr, ((u64)high << 32 | low), err);
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index 829df26fd7a3..76d726074c16 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -24,8 +24,8 @@
#define _ASM_X86_MTRR_H
#include <uapi/asm/mtrr.h>
-#include <asm/memtype.h>
+void mtrr_bp_init(void);
/*
* The following functions are for use by other drivers that cannot use
@@ -43,7 +43,6 @@ extern int mtrr_del(int reg, unsigned long base, unsigned long size);
extern int mtrr_del_page(int reg, unsigned long base, unsigned long size);
extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi);
extern void mtrr_ap_init(void);
-extern void mtrr_bp_init(void);
extern void set_mtrr_aps_delayed_init(void);
extern void mtrr_aps_init(void);
extern void mtrr_bp_restore(void);
@@ -84,11 +83,6 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn)
static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
{
}
-static inline void mtrr_bp_init(void)
-{
- pat_disable("PAT support disabled because CONFIG_MTRR is disabled in the kernel.");
-}
-
#define mtrr_ap_init() do {} while (0)
#define set_mtrr_aps_delayed_init() do {} while (0)
#define mtrr_aps_init() do {} while (0)
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 29dd27b5a339..3a8fdf881313 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -13,6 +13,7 @@
#define MWAIT_SUBSTATE_SIZE 4
#define MWAIT_HINT2CSTATE(hint) (((hint) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK)
#define MWAIT_HINT2SUBSTATE(hint) ((hint) & MWAIT_CSTATE_MASK)
+#define MWAIT_C1_SUBSTATE_MASK 0xf0
#define CPUID_MWAIT_LEAF 5
#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 1cb9c17a4cb4..5c5f1e56c404 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -47,6 +47,7 @@ struct nmiaction {
#define register_nmi_handler(t, fn, fg, n, init...) \
({ \
static struct nmiaction init fn##_na = { \
+ .list = LIST_HEAD_INIT(fn##_na.list), \
.handler = (fn), \
.name = (n), \
.flags = (fg), \
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index cc74dc584836..c936ce9f0c47 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -11,6 +11,7 @@
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
#include <asm/unwind_hints.h>
+#include <asm/percpu.h>
#define RETPOLINE_THUNK_SIZE 32
@@ -34,32 +35,57 @@
#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
/*
+ * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN.
+ */
+#define __FILL_RETURN_SLOT \
+ ANNOTATE_INTRA_FUNCTION_CALL; \
+ call 772f; \
+ int3; \
+772:
+
+/*
+ * Stuff the entire RSB.
+ *
* Google experimented with loop-unrolling and this turned out to be
* the optimal version - two calls, each with their own speculation
* trap should their return address end up getting used, in a loop.
*/
-#define __FILL_RETURN_BUFFER(reg, nr, sp) \
- mov $(nr/2), reg; \
-771: \
- ANNOTATE_INTRA_FUNCTION_CALL; \
- call 772f; \
-773: /* speculation trap */ \
- UNWIND_HINT_EMPTY; \
- pause; \
- lfence; \
- jmp 773b; \
-772: \
- ANNOTATE_INTRA_FUNCTION_CALL; \
- call 774f; \
-775: /* speculation trap */ \
- UNWIND_HINT_EMPTY; \
- pause; \
- lfence; \
- jmp 775b; \
-774: \
- add $(BITS_PER_LONG/8) * 2, sp; \
- dec reg; \
- jnz 771b;
+#ifdef CONFIG_X86_64
+#define __FILL_RETURN_BUFFER(reg, nr) \
+ mov $(nr/2), reg; \
+771: \
+ __FILL_RETURN_SLOT \
+ __FILL_RETURN_SLOT \
+ add $(BITS_PER_LONG/8) * 2, %_ASM_SP; \
+ dec reg; \
+ jnz 771b; \
+ /* barrier for jnz misprediction */ \
+ lfence;
+#else
+/*
+ * i386 doesn't unconditionally have LFENCE, as such it can't
+ * do a loop.
+ */
+#define __FILL_RETURN_BUFFER(reg, nr) \
+ .rept nr; \
+ __FILL_RETURN_SLOT; \
+ .endr; \
+ add $(BITS_PER_LONG/8) * nr, %_ASM_SP;
+#endif
+
+/*
+ * Stuff a single RSB slot.
+ *
+ * To mitigate Post-Barrier RSB speculation, one CALL instruction must be
+ * forced to retire before letting a RET instruction execute.
+ *
+ * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed
+ * before this point.
+ */
+#define __FILL_ONE_RETURN \
+ __FILL_RETURN_SLOT \
+ add $(BITS_PER_LONG/8), %_ASM_SP; \
+ lfence;
#ifdef __ASSEMBLY__
@@ -76,25 +102,54 @@
.endm
/*
+ * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions
+ * vs RETBleed validation.
+ */
+#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE
+
+/*
+ * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should
+ * eventually turn into it's own annotation.
+ */
+.macro ANNOTATE_UNRET_END
+#ifdef CONFIG_DEBUG_ENTRY
+ ANNOTATE_RETPOLINE_SAFE
+ nop
+#endif
+.endm
+
+/*
+ * Equivalent to -mindirect-branch-cs-prefix; emit the 5 byte jmp/call
+ * to the retpoline thunk with a CS prefix when the register requires
+ * a RAX prefix byte to encode. Also see apply_retpolines().
+ */
+.macro __CS_PREFIX reg:req
+ .irp rs,r8,r9,r10,r11,r12,r13,r14,r15
+ .ifc \reg,\rs
+ .byte 0x2e
+ .endif
+ .endr
+.endm
+
+/*
* JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
* indirect jmp/call which may be susceptible to the Spectre variant 2
* attack.
*/
.macro JMP_NOSPEC reg:req
#ifdef CONFIG_RETPOLINE
- ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
- __stringify(jmp __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
+ __CS_PREFIX \reg
+ jmp __x86_indirect_thunk_\reg
#else
jmp *%\reg
+ int3
#endif
.endm
.macro CALL_NOSPEC reg:req
#ifdef CONFIG_RETPOLINE
- ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \
- __stringify(call __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD
+ __CS_PREFIX \reg
+ call __x86_indirect_thunk_\reg
#else
call *%\reg
#endif
@@ -104,11 +159,37 @@
* A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
* monstrosity above, manually.
*/
-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
-#ifdef CONFIG_RETPOLINE
- ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
- __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS)
+ ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \
+ __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \
+ __stringify(__FILL_ONE_RETURN), \ftr2
+
.Lskip_rsb_\@:
+.endm
+
+#ifdef CONFIG_CPU_UNRET_ENTRY
+#define CALL_ZEN_UNTRAIN_RET "call zen_untrain_ret"
+#else
+#define CALL_ZEN_UNTRAIN_RET ""
+#endif
+
+/*
+ * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
+ * return thunk isn't mapped into the userspace tables (then again, AMD
+ * typically has NO_MELTDOWN).
+ *
+ * While zen_untrain_ret() doesn't clobber anything but requires stack,
+ * entry_ibpb() will clobber AX, CX, DX.
+ *
+ * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
+ * where we have a stack but before any RET instruction.
+ */
+.macro UNTRAIN_RET
+#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY)
+ ANNOTATE_UNRET_END
+ ALTERNATIVE_2 "", \
+ CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \
+ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB
#endif
.endm
@@ -120,17 +201,20 @@
_ASM_PTR " 999b\n\t" \
".popsection\n\t"
-#ifdef CONFIG_RETPOLINE
-
typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
+extern retpoline_thunk_t __x86_indirect_thunk_array[];
+
+extern void __x86_return_thunk(void);
+extern void zen_untrain_ret(void);
+extern void entry_ibpb(void);
+
+#ifdef CONFIG_RETPOLINE
#define GEN(reg) \
extern retpoline_thunk_t __x86_indirect_thunk_ ## reg;
#include <asm/GEN-for-each-reg.h>
#undef GEN
-extern retpoline_thunk_t __x86_indirect_thunk_array[];
-
#ifdef CONFIG_X86_64
/*
@@ -146,7 +230,7 @@ extern retpoline_thunk_t __x86_indirect_thunk_array[];
"lfence;\n" \
ANNOTATE_RETPOLINE_SAFE \
"call *%[thunk_target]\n", \
- X86_FEATURE_RETPOLINE_AMD)
+ X86_FEATURE_RETPOLINE_LFENCE)
# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
@@ -176,7 +260,7 @@ extern retpoline_thunk_t __x86_indirect_thunk_array[];
"lfence;\n" \
ANNOTATE_RETPOLINE_SAFE \
"call *%[thunk_target]\n", \
- X86_FEATURE_RETPOLINE_AMD)
+ X86_FEATURE_RETPOLINE_LFENCE)
# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
#endif
@@ -188,9 +272,12 @@ extern retpoline_thunk_t __x86_indirect_thunk_array[];
/* The Spectre V2 mitigation variants */
enum spectre_v2_mitigation {
SPECTRE_V2_NONE,
- SPECTRE_V2_RETPOLINE_GENERIC,
- SPECTRE_V2_RETPOLINE_AMD,
- SPECTRE_V2_IBRS_ENHANCED,
+ SPECTRE_V2_RETPOLINE,
+ SPECTRE_V2_LFENCE,
+ SPECTRE_V2_EIBRS,
+ SPECTRE_V2_EIBRS_RETPOLINE,
+ SPECTRE_V2_EIBRS_LFENCE,
+ SPECTRE_V2_IBRS,
};
/* The indirect branch speculation control variants */
@@ -233,6 +320,9 @@ static inline void indirect_branch_prediction_barrier(void)
/* The Intel SPEC CTRL MSR base value cache */
extern u64 x86_spec_ctrl_base;
+DECLARE_PER_CPU(u64, x86_spec_ctrl_current);
+extern void write_spec_ctrl_current(u64 val, bool force);
+extern u64 spec_ctrl_current(void);
/*
* With retpoline, we must use IBRS to restrict branch prediction
@@ -242,18 +332,18 @@ extern u64 x86_spec_ctrl_base;
*/
#define firmware_restrict_branch_speculation_start() \
do { \
- u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \
- \
preempt_disable(); \
- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, \
+ spec_ctrl_current() | SPEC_CTRL_IBRS, \
X86_FEATURE_USE_IBRS_FW); \
+ alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, \
+ X86_FEATURE_USE_IBPB_FW); \
} while (0)
#define firmware_restrict_branch_speculation_end() \
do { \
- u64 val = x86_spec_ctrl_base; \
- \
- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, \
+ spec_ctrl_current(), \
X86_FEATURE_USE_IBRS_FW); \
preempt_enable(); \
} while (0)
@@ -267,6 +357,8 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
+DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
+
#include <asm/segment.h>
/**
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 4d5810c8fab7..9cc82f305f4b 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -71,6 +71,16 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
extern bool __virt_addr_valid(unsigned long kaddr);
#define virt_addr_valid(kaddr) __virt_addr_valid((unsigned long) (kaddr))
+static __always_inline u64 __canonical_address(u64 vaddr, u8 vaddr_bits)
+{
+ return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits);
+}
+
+static __always_inline u64 __is_canonical_address(u64 vaddr, u8 vaddr_bits)
+{
+ return __canonical_address(vaddr, vaddr_bits) == vaddr;
+}
+
#endif /* __ASSEMBLY__ */
#include <asm-generic/memory_model.h>
diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h
index b13f8488ac85..df42f8aa99e4 100644
--- a/arch/x86/include/asm/page_32.h
+++ b/arch/x86/include/asm/page_32.h
@@ -19,19 +19,6 @@ extern unsigned long __phys_addr(unsigned long);
#define pfn_valid(pfn) ((pfn) < max_mapnr)
#endif /* CONFIG_FLATMEM */
-#ifdef CONFIG_X86_USE_3DNOW
-#include <asm/mmx.h>
-
-static inline void clear_page(void *page)
-{
- mmx_clear_page(page);
-}
-
-static inline void copy_page(void *to, void *from)
-{
- mmx_copy_page(to, from);
-}
-#else /* !CONFIG_X86_USE_3DNOW */
#include <linux/string.h>
static inline void clear_page(void *page)
@@ -43,7 +30,6 @@ static inline void copy_page(void *to, void *from)
{
memcpy(to, from, PAGE_SIZE);
}
-#endif /* CONFIG_X86_USE_3DNOW */
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_PAGE_32_H */
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 4bde0dc66100..198e03e59ca1 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -5,8 +5,11 @@
#include <asm/page_64_types.h>
#ifndef __ASSEMBLY__
+#include <asm/cpufeatures.h>
#include <asm/alternative.h>
+#include <linux/kmsan-checks.h>
+
/* duplicated to the one in bootmem.h */
extern unsigned long max_pfn;
extern unsigned long phys_base;
@@ -15,7 +18,7 @@ extern unsigned long page_offset_base;
extern unsigned long vmalloc_base;
extern unsigned long vmemmap_base;
-static inline unsigned long __phys_addr_nodebug(unsigned long x)
+static __always_inline unsigned long __phys_addr_nodebug(unsigned long x)
{
unsigned long y = x - __START_KERNEL_map;
@@ -46,6 +49,11 @@ void clear_page_erms(void *page);
static inline void clear_page(void *page)
{
+ /*
+ * Clean up KMSAN metadata for the page being cleared. The assembly call
+ * below clobbers @page, so we perform unpoisoning before it.
+ */
+ kmsan_unpoison_memory(page, PAGE_SIZE);
alternative_call_2(clear_page_orig,
clear_page_rep, X86_FEATURE_REP_GOOD,
clear_page_erms, X86_FEATURE_ERMS,
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 21c4a694ca11..2a0b8dd4ec33 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -666,12 +666,13 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
".globl " PV_THUNK_NAME(func) ";" \
".type " PV_THUNK_NAME(func) ", @function;" \
PV_THUNK_NAME(func) ":" \
+ ASM_ENDBR \
FRAME_BEGIN \
PV_SAVE_ALL_CALLER_REGS \
"call " #func ";" \
PV_RESTORE_ALL_CALLER_REGS \
FRAME_END \
- "ret;" \
+ ASM_RET \
".size " PV_THUNK_NAME(func) ", .-" PV_THUNK_NAME(func) ";" \
".popsection")
@@ -742,6 +743,7 @@ extern void default_banner(void);
word 771b; \
.byte ptype; \
.byte 772b-771b; \
+ _ASM_ALIGN; \
.popsection
@@ -752,11 +754,6 @@ extern void default_banner(void);
#define PARA_SITE(ptype, ops) _PVSITE(ptype, ops, .quad, 8)
#define PARA_INDIRECT(addr) *addr(%rip)
-#define INTERRUPT_RETURN \
- ANNOTATE_RETPOLINE_SAFE; \
- ALTERNATIVE_TERNARY("jmp *paravirt_iret(%rip);", \
- X86_FEATURE_XENPV, "jmp xen_iret;", "jmp native_iret;")
-
#ifdef CONFIG_DEBUG_ENTRY
.macro PARA_IRQ_save_fl
PARA_SITE(PARA_PATCH(PV_IRQ_save_fl),
diff --git a/arch/x86/include/asm/paravirt_api_clock.h b/arch/x86/include/asm/paravirt_api_clock.h
new file mode 100644
index 000000000000..65ac7cee0dad
--- /dev/null
+++ b/arch/x86/include/asm/paravirt_api_clock.h
@@ -0,0 +1 @@
+#include <asm/paravirt.h>
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index a69012e1903f..f3d601574730 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -272,14 +272,13 @@ struct paravirt_patch_template {
extern struct pv_info pv_info;
extern struct paravirt_patch_template pv_ops;
-extern void (*paravirt_iret)(void);
#define PARAVIRT_PATCH(x) \
(offsetof(struct paravirt_patch_template, x) / sizeof(void *))
#define paravirt_type(op) \
[paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \
- [paravirt_opptr] "i" (&(pv_ops.op))
+ [paravirt_opptr] "m" (pv_ops.op)
#define paravirt_clobber(clobber) \
[paravirt_clobber] "i" (clobber)
@@ -295,6 +294,7 @@ extern void (*paravirt_iret)(void);
" .byte " type "\n" \
" .byte 772b-771b\n" \
" .short " clobber "\n" \
+ _ASM_ALIGN "\n" \
".popsection\n"
/* Generate patchable code, with the default asm parameters. */
@@ -316,7 +316,7 @@ int paravirt_disable_iospace(void);
*/
#define PARAVIRT_CALL \
ANNOTATE_RETPOLINE_SAFE \
- "call *%c[paravirt_opptr];"
+ "call *%[paravirt_opptr];"
/*
* These macros are intended to wrap calls through one of the paravirt
@@ -329,7 +329,7 @@ int paravirt_disable_iospace(void);
* Unfortunately, this is a relatively slow operation for modern CPUs,
* because it cannot necessarily determine what the destination
* address is. In this case, the address is a runtime constant, so at
- * the very least we can patch the call to e a simple direct call, or
+ * the very least we can patch the call to a simple direct call, or,
* ideally, patch an inline implementation into the callsite. (Direct
* calls are essentially free, because the call and return addresses
* are completely predictable.)
@@ -340,10 +340,10 @@ int paravirt_disable_iospace(void);
* on the stack. All caller-save registers (eax,edx,ecx) are expected
* to be modified (either clobbered or used for return values).
* X86_64, on the other hand, already specifies a register-based calling
- * conventions, returning at %rax, with parameters going on %rdi, %rsi,
+ * conventions, returning at %rax, with parameters going in %rdi, %rsi,
* %rdx, and %rcx. Note that for this reason, x86_64 does not need any
* special handling for dealing with 4 arguments, unlike i386.
- * However, x86_64 also have to clobber all caller saved registers, which
+ * However, x86_64 also has to clobber all caller saved registers, which
* unfortunately, are quite a bit (r8 - r11)
*
* The call instruction itself is marked by placing its start address
@@ -361,22 +361,22 @@ int paravirt_disable_iospace(void);
* There are 5 sets of PVOP_* macros for dealing with 0-4 arguments.
* It could be extended to more arguments, but there would be little
* to be gained from that. For each number of arguments, there are
- * the two VCALL and CALL variants for void and non-void functions.
+ * two VCALL and CALL variants for void and non-void functions.
*
* When there is a return value, the invoker of the macro must specify
* the return type. The macro then uses sizeof() on that type to
- * determine whether its a 32 or 64 bit value, and places the return
+ * determine whether it's a 32 or 64 bit value and places the return
* in the right register(s) (just %eax for 32-bit, and %edx:%eax for
- * 64-bit). For x86_64 machines, it just returns at %rax regardless of
+ * 64-bit). For x86_64 machines, it just returns in %rax regardless of
* the return value size.
*
- * 64-bit arguments are passed as a pair of adjacent 32-bit arguments
+ * 64-bit arguments are passed as a pair of adjacent 32-bit arguments;
* i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments
* in low,high order
*
* Small structures are passed and returned in registers. The macro
* calling convention can't directly deal with this, so the wrapper
- * functions must do this.
+ * functions must do it.
*
* These PVOP_* macros are only defined within this header. This
* means that all uses must be wrapped in inline functions. This also
@@ -415,8 +415,17 @@ int paravirt_disable_iospace(void);
"=c" (__ecx)
#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax)
-/* void functions are still allowed [re]ax for scratch */
+/*
+ * void functions are still allowed [re]ax for scratch.
+ *
+ * The ZERO_CALL_USED REGS feature may end up zeroing out callee-saved
+ * registers. Make sure we model this with the appropriate clobbers.
+ */
+#ifdef CONFIG_ZERO_CALL_USED_REGS
+#define PVOP_VCALLEE_CLOBBERS "=a" (__eax), PVOP_VCALL_CLOBBERS
+#else
#define PVOP_VCALLEE_CLOBBERS "=a" (__eax)
+#endif
#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS
#define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11"
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index d2c76c8d8cfd..736793d65bcb 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -105,9 +105,6 @@ static inline void early_quirks(void) { }
extern void pci_iommu_alloc(void);
-/* generic pci stuff */
-#include <asm-generic/pci.h>
-
#ifdef CONFIG_NUMA
/* Returns the node based on pci bus */
static inline int __pcibus_to_node(const struct pci_bus *bus)
@@ -135,7 +132,7 @@ struct pci_setup_rom {
unsigned long bus;
unsigned long device;
unsigned long function;
- uint8_t romdata[0];
+ uint8_t romdata[];
};
#endif /* _ASM_X86_PCI_H */
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 490411dba438..70533fdcbf02 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -5,7 +5,10 @@
* (c) 1999 Martin Mares <mj@ucw.cz>
*/
+#include <linux/errno.h>
+#include <linux/init.h>
#include <linux/ioport.h>
+#include <linux/spinlock.h>
#undef DEBUG
@@ -39,6 +42,8 @@ do { \
#define PCI_ROOT_NO_CRS 0x100000
#define PCI_NOASSIGN_BARS 0x200000
#define PCI_BIG_ROOT_WINDOW 0x400000
+#define PCI_USE_E820 0x800000
+#define PCI_NO_E820 0x1000000
extern unsigned int pci_probe;
extern unsigned long pirq_table_addr;
@@ -64,6 +69,8 @@ void pcibios_scan_specific_bus(int busn);
/* pci-irq.c */
+struct pci_dev;
+
struct irq_info {
u8 bus, devfn; /* Bus, device and function */
struct {
@@ -87,7 +94,16 @@ struct irq_routing_table {
u32 miniport_data; /* Crap */
u8 rfu[11];
u8 checksum; /* Modulo 256 checksum must give 0 */
- struct irq_info slots[0];
+ struct irq_info slots[];
+} __attribute__((packed));
+
+struct irt_routing_table {
+ u32 signature; /* IRT_SIGNATURE should be here */
+ u8 size; /* Number of entries provided */
+ u8 used; /* Number of entries actually used */
+ u16 exclusive_irqs; /* IRQs devoted exclusively to
+ PCI usage */
+ struct irq_info slots[];
} __attribute__((packed));
extern unsigned int pcibios_irq_mask;
@@ -232,3 +248,9 @@ static inline void mmio_config_writel(void __iomem *pos, u32 val)
# define x86_default_pci_init_irq NULL
# define x86_default_pci_fixup_irqs NULL
#endif
+
+#if defined(CONFIG_PCI) && defined(CONFIG_ACPI)
+extern bool pci_use_e820;
+#else
+#define pci_use_e820 false
+#endif
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index a3c33b79fb86..13c0d63ed55e 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -38,9 +38,9 @@
#define arch_raw_cpu_ptr(ptr) \
({ \
unsigned long tcp_ptr__; \
- asm volatile("add " __percpu_arg(1) ", %0" \
- : "=r" (tcp_ptr__) \
- : "m" (this_cpu_off), "0" (ptr)); \
+ asm ("add " __percpu_arg(1) ", %0" \
+ : "=r" (tcp_ptr__) \
+ : "m" (this_cpu_off), "0" (ptr)); \
(typeof(*(ptr)) __kernel __force *)tcp_ptr__; \
})
#else
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 8fc1b5003713..9ac46dbe57d4 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -2,12 +2,14 @@
#ifndef _ASM_X86_PERF_EVENT_H
#define _ASM_X86_PERF_EVENT_H
+#include <linux/static_call.h>
+
/*
* Performance event hw details:
*/
#define INTEL_PMC_MAX_GENERIC 32
-#define INTEL_PMC_MAX_FIXED 4
+#define INTEL_PMC_MAX_FIXED 16
#define INTEL_PMC_IDX_FIXED 32
#define X86_PMC_IDX_MAX 64
@@ -87,6 +89,19 @@
#define AMD64_RAW_EVENT_MASK_NB \
(AMD64_EVENTSEL_EVENT | \
ARCH_PERFMON_EVENTSEL_UMASK)
+
+#define AMD64_PERFMON_V2_EVENTSEL_EVENT_NB \
+ (AMD64_EVENTSEL_EVENT | \
+ GENMASK_ULL(37, 36))
+
+#define AMD64_PERFMON_V2_EVENTSEL_UMASK_NB \
+ (ARCH_PERFMON_EVENTSEL_UMASK | \
+ GENMASK_ULL(27, 24))
+
+#define AMD64_PERFMON_V2_RAW_EVENT_MASK_NB \
+ (AMD64_PERFMON_V2_EVENTSEL_EVENT_NB | \
+ AMD64_PERFMON_V2_EVENTSEL_UMASK_NB)
+
#define AMD64_NUM_COUNTERS 4
#define AMD64_NUM_COUNTERS_CORE 6
#define AMD64_NUM_COUNTERS_NB 4
@@ -184,6 +199,22 @@ union cpuid28_ecx {
unsigned int full;
};
+/*
+ * AMD "Extended Performance Monitoring and Debug" CPUID
+ * detection/enumeration details:
+ */
+union cpuid_0x80000022_ebx {
+ struct {
+ /* Number of Core Performance Counters */
+ unsigned int num_core_pmc:4;
+ /* Number of available LBR Stack Entries */
+ unsigned int lbr_v2_stack_sz:6;
+ /* Number of Data Fabric Counters */
+ unsigned int num_df_pmc:6;
+ } split;
+ unsigned int full;
+};
+
struct x86_pmu_capability {
int version;
int num_counters_gp;
@@ -192,6 +223,7 @@ struct x86_pmu_capability {
int bit_width_fixed;
unsigned int events_mask;
int events_mask_len;
+ unsigned int pebs_ept :1;
};
/*
@@ -241,6 +273,11 @@ struct x86_pmu_capability {
#define INTEL_PMC_IDX_FIXED_SLOTS (INTEL_PMC_IDX_FIXED + 3)
#define INTEL_PMC_MSK_FIXED_SLOTS (1ULL << INTEL_PMC_IDX_FIXED_SLOTS)
+static inline bool use_fixed_pseudo_encoding(u64 code)
+{
+ return !(code & 0xff);
+}
+
/*
* We model BTS tracing as another fixed-mode PMC.
*
@@ -366,6 +403,11 @@ struct pebs_xmm {
};
/*
+ * AMD Extended Performance Monitoring and Debug cpuid feature detection
+ */
+#define EXT_PERFMON_DEBUG_FEATURES 0x80000022
+
+/*
* IBS cpuid feature detection
*/
@@ -386,6 +428,7 @@ struct pebs_xmm {
#define IBS_CAPS_OPBRNFUSE (1U<<8)
#define IBS_CAPS_FETCHCTLEXTD (1U<<9)
#define IBS_CAPS_OPDATA4 (1U<<10)
+#define IBS_CAPS_ZEN4 (1U<<11)
#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \
| IBS_CAPS_FETCHSAM \
@@ -399,6 +442,7 @@ struct pebs_xmm {
#define IBSCTL_LVT_OFFSET_MASK 0x0F
/* IBS fetch bits/masks */
+#define IBS_FETCH_L3MISSONLY (1ULL<<59)
#define IBS_FETCH_RAND_EN (1ULL<<57)
#define IBS_FETCH_VAL (1ULL<<49)
#define IBS_FETCH_ENABLE (1ULL<<48)
@@ -415,6 +459,7 @@ struct pebs_xmm {
#define IBS_OP_CNT_CTL (1ULL<<19)
#define IBS_OP_VAL (1ULL<<18)
#define IBS_OP_ENABLE (1ULL<<17)
+#define IBS_OP_L3MISSONLY (1ULL<<16)
#define IBS_OP_MAX_CNT 0x0000FFFFULL
#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
#define IBS_OP_MAX_CNT_EXT_MASK (0x7FULL<<20) /* separate upper 7 bits */
@@ -477,6 +522,7 @@ struct x86_pmu_lbr {
};
extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
+extern u64 perf_get_hw_event_config(int hw_event);
extern void perf_check_microcode(void);
extern void perf_clear_dirty_counters(void);
extern int x86_perf_rdpmc_index(struct perf_event *event);
@@ -486,15 +532,20 @@ static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
memset(cap, 0, sizeof(*cap));
}
+static inline u64 perf_get_hw_event_config(int hw_event)
+{
+ return 0;
+}
+
static inline void perf_events_lapic_init(void) { }
static inline void perf_check_microcode(void) { }
#endif
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
-extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
+extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data);
extern int x86_perf_get_lbr(struct x86_pmu_lbr *lbr);
#else
-struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
+struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data);
static inline int x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
{
return -1;
@@ -513,6 +564,27 @@ static inline void intel_pt_handle_vmx(int on)
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
extern void amd_pmu_enable_virt(void);
extern void amd_pmu_disable_virt(void);
+
+#if defined(CONFIG_PERF_EVENTS_AMD_BRS)
+
+#define PERF_NEEDS_LOPWR_CB 1
+
+/*
+ * architectural low power callback impacts
+ * drivers/acpi/processor_idle.c
+ * drivers/acpi/acpi_pad.c
+ */
+extern void perf_amd_brs_lopwr_cb(bool lopwr_in);
+
+DECLARE_STATIC_CALL(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
+
+static inline void perf_lopwr_cb(bool lopwr_in)
+{
+ static_call_mod(perf_lopwr_cb)(lopwr_in);
+}
+
+#endif /* PERF_NEEDS_LOPWR_CB */
+
#else
static inline void amd_pmu_enable_virt(void) { }
static inline void amd_pmu_disable_virt(void) { }
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index e896ebef8c24..28421a887209 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -256,10 +256,10 @@ static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
/* We always extract/encode the offset by shifting it all the way up, and then down again */
#define SWP_OFFSET_SHIFT (SWP_OFFSET_FIRST_BIT + SWP_TYPE_BITS)
-#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
-#define __swp_type(x) (((x).val) & 0x1f)
-#define __swp_offset(x) ((x).val >> 5)
-#define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << 5})
+#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
+#define __swp_type(x) (((x).val) & ((1UL << SWP_TYPE_BITS) - 1))
+#define __swp_offset(x) ((x).val >> SWP_TYPE_BITS)
+#define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << SWP_TYPE_BITS})
/*
* Normally, __swp_entry() converts from arch-independent swp_entry_t to
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index a34430b7af4a..5059799bebe3 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -15,18 +15,14 @@
cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS))) \
: (prot))
-/*
- * Macros to add or remove encryption attribute
- */
-#define pgprot_encrypted(prot) __pgprot(__sme_set(pgprot_val(prot)))
-#define pgprot_decrypted(prot) __pgprot(__sme_clr(pgprot_val(prot)))
-
#ifndef __ASSEMBLY__
#include <linux/spinlock.h>
#include <asm/x86_init.h>
#include <asm/pkru.h>
#include <asm/fpu/api.h>
+#include <asm/coco.h>
#include <asm-generic/pgtable_uffd.h>
+#include <linux/page_table_check.h>
extern pgd_t early_top_pgt[PTRS_PER_PGD];
bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
@@ -37,6 +33,12 @@ void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm,
void ptdump_walk_pgd_level_checkwx(void);
void ptdump_walk_user_pgd_level_checkwx(void);
+/*
+ * Macros to add or remove encryption attribute
+ */
+#define pgprot_encrypted(prot) __pgprot(cc_mkenc(pgprot_val(prot)))
+#define pgprot_decrypted(prot) __pgprot(cc_mkdec(pgprot_val(prot)))
+
#ifdef CONFIG_DEBUG_WX
#define debug_checkwx() ptdump_walk_pgd_level_checkwx()
#define debug_checkwx_user() ptdump_walk_user_pgd_level_checkwx()
@@ -647,11 +649,6 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
#define canon_pgprot(p) __pgprot(massage_pgprot(p))
-static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
-{
- return canon_pgprot(prot);
-}
-
static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
enum page_cache_mode pcm,
enum page_cache_mode new_pcm)
@@ -753,7 +750,7 @@ static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
return true;
if ((pte_flags(a) & _PAGE_PROTNONE) &&
- mm_tlb_flush_pending(mm))
+ atomic_read(&mm->tlb_flush_pending))
return true;
return false;
@@ -818,7 +815,8 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
static inline int pmd_bad(pmd_t pmd)
{
- return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
+ return (pmd_flags(pmd) & ~(_PAGE_USER | _PAGE_ACCESSED)) !=
+ (_KERNPG_TABLE & ~_PAGE_ACCESSED);
}
static inline unsigned long pages_to_mb(unsigned long npg)
@@ -1007,18 +1005,21 @@ static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp)
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
+ page_table_check_pte_set(mm, addr, ptep, pte);
set_pte(ptep, pte);
}
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
+ page_table_check_pmd_set(mm, addr, pmdp, pmd);
set_pmd(pmdp, pmd);
}
static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
pud_t *pudp, pud_t pud)
{
+ page_table_check_pud_set(mm, addr, pudp, pud);
native_set_pud(pudp, pud);
}
@@ -1049,6 +1050,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
pte_t pte = native_ptep_get_and_clear(ptep);
+ page_table_check_pte_clear(mm, addr, pte);
return pte;
}
@@ -1064,6 +1066,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
* care about updates and native needs no locking
*/
pte = native_local_ptep_get_and_clear(ptep);
+ page_table_check_pte_clear(mm, addr, pte);
} else {
pte = ptep_get_and_clear(mm, addr, ptep);
}
@@ -1110,14 +1113,22 @@ static inline int pmd_write(pmd_t pmd)
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp)
{
- return native_pmdp_get_and_clear(pmdp);
+ pmd_t pmd = native_pmdp_get_and_clear(pmdp);
+
+ page_table_check_pmd_clear(mm, addr, pmd);
+
+ return pmd;
}
#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
unsigned long addr, pud_t *pudp)
{
- return native_pudp_get_and_clear(pudp);
+ pud_t pud = native_pudp_get_and_clear(pudp);
+
+ page_table_check_pud_clear(mm, addr, pud);
+
+ return pud;
}
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
@@ -1138,6 +1149,7 @@ static inline int pud_write(pud_t pud)
static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp, pmd_t pmd)
{
+ page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd);
if (IS_ENABLED(CONFIG_SMP)) {
return xchg(pmdp, pmd);
} else {
@@ -1147,6 +1159,11 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
}
}
#endif
+
+#define __HAVE_ARCH_PMDP_INVALIDATE_AD
+extern pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp);
+
/*
* Page table pages are page-aligned. The lower half of the top
* level is used for userspace and the top half for the kernel.
@@ -1265,6 +1282,23 @@ static inline void update_mmu_cache_pud(struct vm_area_struct *vma,
unsigned long addr, pud_t *pud)
{
}
+#ifdef _PAGE_SWP_EXCLUSIVE
+#define __HAVE_ARCH_PTE_SWP_EXCLUSIVE
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ return pte_set_flags(pte, _PAGE_SWP_EXCLUSIVE);
+}
+
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte_flags(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ return pte_clear_flags(pte, _PAGE_SWP_EXCLUSIVE);
+}
+#endif /* _PAGE_SWP_EXCLUSIVE */
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
@@ -1398,12 +1432,29 @@ static inline bool arch_has_pfn_modify_check(void)
return boot_cpu_has_bug(X86_BUG_L1TF);
}
-#define arch_faults_on_old_pte arch_faults_on_old_pte
-static inline bool arch_faults_on_old_pte(void)
+#define arch_has_hw_pte_young arch_has_hw_pte_young
+static inline bool arch_has_hw_pte_young(void)
{
- return false;
+ return true;
+}
+
+#ifdef CONFIG_PAGE_TABLE_CHECK
+static inline bool pte_user_accessible_page(pte_t pte)
+{
+ return (pte_val(pte) & _PAGE_PRESENT) && (pte_val(pte) & _PAGE_USER);
+}
+
+static inline bool pmd_user_accessible_page(pmd_t pmd)
+{
+ return pmd_leaf(pmd) && (pmd_val(pmd) & _PAGE_PRESENT) && (pmd_val(pmd) & _PAGE_USER);
}
+static inline bool pud_user_accessible_page(pud_t pud)
+{
+ return pud_leaf(pud) && (pud_val(pud) & _PAGE_PRESENT) && (pud_val(pud) & _PAGE_USER);
+}
+#endif
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_PGTABLE_H */
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 56d0399a0cd1..e479491da8d5 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -186,7 +186,7 @@ static inline void native_pgd_clear(pgd_t *pgd)
*
* | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number
* | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names
- * | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| X|F|SD|0| <- swp entry
+ * | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| E|F|SD|0| <- swp entry
*
* G (8) is aliased and used as a PROT_NONE indicator for
* !present ptes. We need to start storing swap entries above
@@ -203,6 +203,8 @@ static inline void native_pgd_clear(pgd_t *pgd)
* F (2) in swp entry is used to record when a pagetable is
* writeprotected by userfaultfd WP support.
*
+ * E (3) in swp entry is used to rememeber PG_anon_exclusive.
+ *
* Bit 7 in swp entry should be 0 because pmd_present checks not only P,
* but also L and G.
*
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 91ac10654570..04f36063ad54 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -139,7 +139,52 @@ extern unsigned int ptrs_per_p4d;
# define VMEMMAP_START __VMEMMAP_BASE_L4
#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
-#define VMALLOC_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1)
+/*
+ * End of the region for which vmalloc page tables are pre-allocated.
+ * For non-KMSAN builds, this is the same as VMALLOC_END.
+ * For KMSAN builds, VMALLOC_START..VMEMORY_END is 4 times bigger than
+ * VMALLOC_START..VMALLOC_END (see below).
+ */
+#define VMEMORY_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1)
+
+#ifndef CONFIG_KMSAN
+#define VMALLOC_END VMEMORY_END
+#else
+/*
+ * In KMSAN builds vmalloc area is four times smaller, and the remaining 3/4
+ * are used to keep the metadata for virtual pages. The memory formerly
+ * belonging to vmalloc area is now laid out as follows:
+ *
+ * 1st quarter: VMALLOC_START to VMALLOC_END - new vmalloc area
+ * 2nd quarter: KMSAN_VMALLOC_SHADOW_START to
+ * VMALLOC_END+KMSAN_VMALLOC_SHADOW_OFFSET - vmalloc area shadow
+ * 3rd quarter: KMSAN_VMALLOC_ORIGIN_START to
+ * VMALLOC_END+KMSAN_VMALLOC_ORIGIN_OFFSET - vmalloc area origins
+ * 4th quarter: KMSAN_MODULES_SHADOW_START to KMSAN_MODULES_ORIGIN_START
+ * - shadow for modules,
+ * KMSAN_MODULES_ORIGIN_START to
+ * KMSAN_MODULES_ORIGIN_START + MODULES_LEN - origins for modules.
+ */
+#define VMALLOC_QUARTER_SIZE ((VMALLOC_SIZE_TB << 40) >> 2)
+#define VMALLOC_END (VMALLOC_START + VMALLOC_QUARTER_SIZE - 1)
+
+/*
+ * vmalloc metadata addresses are calculated by adding shadow/origin offsets
+ * to vmalloc address.
+ */
+#define KMSAN_VMALLOC_SHADOW_OFFSET VMALLOC_QUARTER_SIZE
+#define KMSAN_VMALLOC_ORIGIN_OFFSET (VMALLOC_QUARTER_SIZE << 1)
+
+#define KMSAN_VMALLOC_SHADOW_START (VMALLOC_START + KMSAN_VMALLOC_SHADOW_OFFSET)
+#define KMSAN_VMALLOC_ORIGIN_START (VMALLOC_START + KMSAN_VMALLOC_ORIGIN_OFFSET)
+
+/*
+ * The shadow/origin for modules are placed one by one in the last 1/4 of
+ * vmalloc space.
+ */
+#define KMSAN_MODULES_SHADOW_START (VMALLOC_END + KMSAN_VMALLOC_ORIGIN_OFFSET + 1)
+#define KMSAN_MODULES_ORIGIN_START (KMSAN_MODULES_SHADOW_START + MODULES_LEN)
+#endif /* CONFIG_KMSAN */
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
/* The module sections ends with the start of the fixmap */
@@ -163,4 +208,9 @@ extern unsigned int ptrs_per_p4d;
#define PGD_KERNEL_START ((PAGE_SIZE / 2) / sizeof(pgd_t))
+/*
+ * We borrow bit 3 to remember PG_anon_exclusive.
+ */
+#define _PAGE_SWP_EXCLUSIVE _PAGE_PWT
+
#endif /* _ASM_X86_PGTABLE_64_DEFS_H */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 40497a9020c6..aa174fed3a71 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -110,9 +110,11 @@
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX)
#define _PAGE_DEVMAP (_AT(u64, 1) << _PAGE_BIT_DEVMAP)
+#define _PAGE_SOFTW4 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW4)
#else
#define _PAGE_NX (_AT(pteval_t, 0))
#define _PAGE_DEVMAP (_AT(pteval_t, 0))
+#define _PAGE_SOFTW4 (_AT(pteval_t, 0))
#endif
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
@@ -228,25 +230,6 @@ enum page_cache_mode {
#endif /* __ASSEMBLY__ */
-/* xwr */
-#define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY
-#define __P010 PAGE_COPY
-#define __P011 PAGE_COPY
-#define __P100 PAGE_READONLY_EXEC
-#define __P101 PAGE_READONLY_EXEC
-#define __P110 PAGE_COPY_EXEC
-#define __P111 PAGE_COPY_EXEC
-
-#define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY
-#define __S010 PAGE_SHARED
-#define __S011 PAGE_SHARED
-#define __S100 PAGE_READONLY_EXEC
-#define __S101 PAGE_READONLY_EXEC
-#define __S110 PAGE_SHARED_EXEC
-#define __S111 PAGE_SHARED_EXEC
-
/*
* early identity mapping pte attrib macros.
*/
@@ -559,10 +542,6 @@ static inline void update_page_count(int level, unsigned long pages) { }
extern pte_t *lookup_address(unsigned long address, unsigned int *level);
extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
unsigned int *level);
-
-struct mm_struct;
-extern pte_t *lookup_address_in_mm(struct mm_struct *mm, unsigned long address,
- unsigned int *level);
extern pmd_t *lookup_pmd_address(unsigned long address);
extern phys_addr_t slow_virt_to_phys(void *__address);
extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn,
diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h
index 1d5f14aff5f6..2e6c04d8a45b 100644
--- a/arch/x86/include/asm/pkeys.h
+++ b/arch/x86/include/asm/pkeys.h
@@ -41,9 +41,6 @@ static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma,
return __arch_override_mprotect_pkey(vma, prot, pkey);
}
-extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
- unsigned long init_val);
-
#define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | VM_PKEY_BIT3)
#define mm_pkey_allocation_map(mm) (mm->context.pkey_allocation_map)
@@ -118,11 +115,6 @@ int mm_pkey_free(struct mm_struct *mm, int pkey)
return 0;
}
-extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
- unsigned long init_val);
-extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
- unsigned long init_val);
-
static inline int vma_pkey(struct vm_area_struct *vma)
{
unsigned long vma_pkey_mask = VM_PKEY_BIT0 | VM_PKEY_BIT1 |
diff --git a/arch/x86/include/asm/pkru.h b/arch/x86/include/asm/pkru.h
index 4cd49afa0ca4..74f0a2d34ffd 100644
--- a/arch/x86/include/asm/pkru.h
+++ b/arch/x86/include/asm/pkru.h
@@ -4,8 +4,8 @@
#include <asm/cpufeature.h>
-#define PKRU_AD_BIT 0x1
-#define PKRU_WD_BIT 0x2
+#define PKRU_AD_BIT 0x1u
+#define PKRU_WD_BIT 0x2u
#define PKRU_BITS_PER_PKEY 2
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index fe5efbcba824..5f6daea1ee24 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -108,16 +108,18 @@ static __always_inline bool should_resched(int preempt_offset)
extern asmlinkage void preempt_schedule(void);
extern asmlinkage void preempt_schedule_thunk(void);
-#define __preempt_schedule_func preempt_schedule_thunk
+#define preempt_schedule_dynamic_enabled preempt_schedule_thunk
+#define preempt_schedule_dynamic_disabled NULL
extern asmlinkage void preempt_schedule_notrace(void);
extern asmlinkage void preempt_schedule_notrace_thunk(void);
-#define __preempt_schedule_notrace_func preempt_schedule_notrace_thunk
+#define preempt_schedule_notrace_dynamic_enabled preempt_schedule_notrace_thunk
+#define preempt_schedule_notrace_dynamic_disabled NULL
#ifdef CONFIG_PREEMPT_DYNAMIC
-DECLARE_STATIC_CALL(preempt_schedule, __preempt_schedule_func);
+DECLARE_STATIC_CALL(preempt_schedule, preempt_schedule_dynamic_enabled);
#define __preempt_schedule() \
do { \
@@ -125,7 +127,7 @@ do { \
asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule) : ASM_CALL_CONSTRAINT); \
} while (0)
-DECLARE_STATIC_CALL(preempt_schedule_notrace, __preempt_schedule_notrace_func);
+DECLARE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_dynamic_enabled);
#define __preempt_schedule_notrace() \
do { \
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 355d38c0cf60..67c9d73b31fa 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -119,6 +119,8 @@ struct cpuinfo_x86 {
int x86_cache_mbm_width_offset;
int x86_power;
unsigned long loops_per_jiffy;
+ /* protected processor identification number */
+ u64 ppin;
/* cpuid returned max cores value: */
u16 x86_max_cores;
u16 apicid;
@@ -557,7 +559,7 @@ static __always_inline void native_swapgs(void)
#endif
}
-static inline unsigned long current_top_of_stack(void)
+static __always_inline unsigned long current_top_of_stack(void)
{
/*
* We can't read directly from tss.sp0: sp0 on x86_32 is special in
@@ -567,7 +569,7 @@ static inline unsigned long current_top_of_stack(void)
return this_cpu_read_stable(cpu_current_top_of_stack);
}
-static inline bool on_thread_stack(void)
+static __always_inline bool on_thread_stack(void)
{
return (unsigned long)(current_top_of_stack() -
current_stack_pointer) < THREAD_SIZE;
@@ -585,9 +587,6 @@ static inline void load_sp0(unsigned long sp0)
#endif /* CONFIG_PARAVIRT_XXL */
-/* Free all resources held by a thread. */
-extern void release_thread(struct task_struct *);
-
unsigned long __get_wchan(struct task_struct *p);
/*
@@ -835,7 +834,7 @@ bool xen_set_default_idle(void);
#define xen_set_default_idle 0
#endif
-void stop_this_cpu(void *dummy);
+void __noreturn stop_this_cpu(void *dummy);
void microcode_check(void);
enum l1tf_mitigations {
@@ -855,4 +854,12 @@ enum mds_mitigations {
MDS_MITIGATION_VMWERV,
};
+#ifdef CONFIG_X86_SGX
+int arch_memory_failure(unsigned long pfn, int flags);
+#define arch_memory_failure arch_memory_failure
+
+bool arch_is_platform_page(u64 paddr);
+#define arch_is_platform_page arch_is_platform_page
+#endif
+
#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index feed36d44d04..12ef86b19910 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -13,6 +13,8 @@ void syscall_init(void);
#ifdef CONFIG_X86_64
void entry_SYSCALL_64(void);
void entry_SYSCALL_64_safe_stack(void);
+void entry_SYSRETQ_unsafe_stack(void);
+void entry_SYSRETQ_end(void);
long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2);
#endif
@@ -28,6 +30,8 @@ void entry_SYSENTER_compat(void);
void __end_entry_SYSENTER_compat(void);
void entry_SYSCALL_compat(void);
void entry_SYSCALL_compat_safe_stack(void);
+void entry_SYSRETL_compat_unsafe_stack(void);
+void entry_SYSRETL_compat_end(void);
void entry_INT80_compat(void);
#ifdef CONFIG_XEN_PV
void xen_entry_INT80_compat(void);
@@ -35,11 +39,9 @@ void xen_entry_INT80_compat(void);
#endif
void x86_configure_nx(void);
-void x86_report_nx(void);
extern int reboot_force;
-long do_arch_prctl_common(struct task_struct *task, int option,
- unsigned long arg2);
+long do_arch_prctl_common(int option, unsigned long arg2);
#endif /* _ASM_X86_PROTO_H */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 703663175a5a..f4db78b09c8f 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -137,7 +137,7 @@ static __always_inline int user_mode(struct pt_regs *regs)
#endif
}
-static inline int v8086_mode(struct pt_regs *regs)
+static __always_inline int v8086_mode(struct pt_regs *regs)
{
#ifdef CONFIG_X86_32
return (regs->flags & X86_VM_MASK);
@@ -186,9 +186,13 @@ static __always_inline bool ip_within_syscall_gap(struct pt_regs *regs)
bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack);
+ ret = ret || (regs->ip >= (unsigned long)entry_SYSRETQ_unsafe_stack &&
+ regs->ip < (unsigned long)entry_SYSRETQ_end);
#ifdef CONFIG_IA32_EMULATION
ret = ret || (regs->ip >= (unsigned long)entry_SYSCALL_compat &&
regs->ip < (unsigned long)entry_SYSCALL_compat_safe_stack);
+ ret = ret || (regs->ip >= (unsigned long)entry_SYSRETL_compat_unsafe_stack &&
+ regs->ip < (unsigned long)entry_SYSRETL_compat_end);
#endif
return ret;
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index d86ab942219c..d87451df480b 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -53,6 +53,7 @@ static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
static inline void queued_spin_unlock(struct qspinlock *lock)
{
+ kcsan_release();
pv_queued_spin_unlock(lock);
}
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
index 159622ee0674..60ece592b220 100644
--- a/arch/x86/include/asm/qspinlock_paravirt.h
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -2,6 +2,8 @@
#ifndef __ASM_QSPINLOCK_PARAVIRT_H
#define __ASM_QSPINLOCK_PARAVIRT_H
+#include <asm/ibt.h>
+
/*
* For x86-64, PV_CALLEE_SAVE_REGS_THUNK() saves and restores 8 64-bit
* registers. For i386, however, only 1 32-bit register needs to be saved
@@ -10,7 +12,7 @@
*/
#ifdef CONFIG_64BIT
-PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath);
+__PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text");
#define __pv_queued_spin_unlock __pv_queued_spin_unlock
#define PV_UNLOCK "__raw_callee_save___pv_queued_spin_unlock"
#define PV_UNLOCK_SLOWPATH "__raw_callee_save___pv_queued_spin_unlock_slowpath"
@@ -18,9 +20,10 @@ PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath);
/*
* Optimized assembly version of __raw_callee_save___pv_queued_spin_unlock
* which combines the registers saving trunk and the body of the following
- * C code:
+ * C code. Note that it puts the code in the .spinlock.text section which
+ * is equivalent to adding __lockfunc in the C code:
*
- * void __pv_queued_spin_unlock(struct qspinlock *lock)
+ * void __lockfunc __pv_queued_spin_unlock(struct qspinlock *lock)
* {
* u8 lockval = cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0);
*
@@ -34,11 +37,12 @@ PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath);
* rsi = lockval (second argument)
* rdx = internal variable (set to 0)
*/
-asm (".pushsection .text;"
+asm (".pushsection .spinlock.text;"
".globl " PV_UNLOCK ";"
".type " PV_UNLOCK ", @function;"
".align 4,0x90;"
PV_UNLOCK ": "
+ ASM_ENDBR
FRAME_BEGIN
"push %rdx;"
"mov $0x1,%eax;"
@@ -48,7 +52,7 @@ asm (".pushsection .text;"
"jne .slowpath;"
"pop %rdx;"
FRAME_END
- "ret;"
+ ASM_RET
".slowpath: "
"push %rsi;"
"movzbl %al,%esi;"
@@ -56,14 +60,14 @@ asm (".pushsection .text;"
"pop %rsi;"
"pop %rdx;"
FRAME_END
- "ret;"
+ ASM_RET
".size " PV_UNLOCK ", .-" PV_UNLOCK ";"
".popsection");
#else /* CONFIG_64BIT */
-extern void __pv_queued_spin_unlock(struct qspinlock *lock);
-PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock);
+extern void __lockfunc __pv_queued_spin_unlock(struct qspinlock *lock);
+__PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock, ".spinlock.text");
#endif /* CONFIG_64BIT */
#endif
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index 5db5d083c873..fd6f6e5b755a 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -25,6 +25,7 @@ struct real_mode_header {
u32 sev_es_trampoline_start;
#endif
#ifdef CONFIG_X86_64
+ u32 trampoline_start64;
u32 trampoline_pgd;
#endif
/* ACPI S3 wakeup */
@@ -89,6 +90,7 @@ static inline void set_real_mode_mem(phys_addr_t mem)
}
void reserve_real_mode(void);
+void load_trampoline_pgtable(void);
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index b2d504f11937..aff774775c67 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -35,11 +35,7 @@
# define NEED_CMOV 0
#endif
-#ifdef CONFIG_X86_USE_3DNOW
-# define NEED_3DNOW (1<<(X86_FEATURE_3DNOW & 31))
-#else
# define NEED_3DNOW 0
-#endif
#if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64)
# define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31))
diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
index d60ed0668a59..d24b04ebf950 100644
--- a/arch/x86/include/asm/resctrl.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -81,6 +81,15 @@ static void __resctrl_sched_in(void)
}
}
+static inline unsigned int resctrl_arch_round_mon_val(unsigned int val)
+{
+ unsigned int scale = boot_cpu_data.x86_cache_occ_scale;
+
+ /* h/w works in units of "boot_cpu_data.x86_cache_occ_scale" */
+ val /= scale;
+ return val * scale;
+}
+
static inline void resctrl_sched_in(void)
{
if (static_branch_likely(&rdt_enable_key))
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index 8a9eba191516..7fa611216417 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -11,7 +11,7 @@
#define __CLOBBERS_MEM(clb...) "memory", ## clb
-#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CONFIG_CC_HAS_ASM_GOTO)
+#ifndef __GCC_ASM_FLAG_OUTPUTS__
/* Use asm goto */
@@ -27,7 +27,7 @@ cc_label: c = true; \
c; \
})
-#else /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CONFIG_CC_HAS_ASM_GOTO) */
+#else /* defined(__GCC_ASM_FLAG_OUTPUTS__) */
/* Use flags output or a set instruction */
@@ -40,7 +40,7 @@ cc_label: c = true; \
c; \
})
-#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CONFIG_CC_HAS_ASM_GOTO) */
+#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) */
#define GEN_UNARY_RMWcc_4(op, var, cc, arg0) \
__GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM())
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 8dd8e8ec9fa5..2e7890dd58a4 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -4,6 +4,7 @@
#include <linux/const.h>
#include <asm/alternative.h>
+#include <asm/ibt.h>
/*
* Constructor for a conventional segment GDT (or LDT) entry.
@@ -275,7 +276,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
* vector has no error code (two bytes), a 'push $vector_number' (two
* bytes), and a jump to the common entry code (up to five bytes).
*/
-#define EARLY_IDT_HANDLER_SIZE 9
+#define EARLY_IDT_HANDLER_SIZE (9 + ENDBR_INSN_SIZE)
/*
* xen_early_idt_handler_array is for Xen pv guests: for each entry in
@@ -283,7 +284,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
* pop %rcx; pop %r11; jmp early_idt_handler_array[i]; summing up to
* max 8 bytes.
*/
-#define XEN_EARLY_IDT_HANDLER_SIZE 8
+#define XEN_EARLY_IDT_HANDLER_SIZE (8 + ENDBR_INSN_SIZE)
#ifndef __ASSEMBLY__
@@ -307,14 +308,7 @@ do { \
\
asm volatile(" \n" \
"1: movl %k0,%%" #seg " \n" \
- \
- ".section .fixup,\"ax\" \n" \
- "2: xorl %k0,%k0 \n" \
- " jmp 1b \n" \
- ".previous \n" \
- \
- _ASM_EXTABLE(1b, 2b) \
- \
+ _ASM_EXTABLE_TYPE_REG(1b, 1b, EX_TYPE_ZERO_REG, %k0)\
: "+r" (__val) : : "memory"); \
} while (0)
@@ -356,18 +350,6 @@ static inline void __loadsegment_fs(unsigned short value)
#define savesegment(seg, value) \
asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
-/*
- * x86-32 user GS accessors. This is ugly and could do with some cleaning up.
- */
-#ifdef CONFIG_X86_32
-# define get_user_gs(regs) (u16)({ unsigned long v; savesegment(gs, v); v; })
-# define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
-# define task_user_gs(tsk) ((tsk)->thread.gs)
-# define lazy_save_gs(v) savesegment(gs, (v))
-# define lazy_load_gs(v) loadsegment(gs, (v))
-# define load_gs_index(v) loadsegment(gs, (v))
-#endif /* X86_32 */
-
#endif /* !__ASSEMBLY__ */
#endif /* __KERNEL__ */
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index 872617542bbc..b45c4d27fd46 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -2,6 +2,7 @@
#ifndef _ASM_X86_SET_MEMORY_H
#define _ASM_X86_SET_MEMORY_H
+#include <linux/mm.h>
#include <asm/page.h>
#include <asm-generic/set_memory.h>
@@ -52,7 +53,6 @@ int set_memory_global(unsigned long addr, int numpages);
int set_pages_array_uc(struct page **pages, int addrinarray);
int set_pages_array_wc(struct page **pages, int addrinarray);
-int set_pages_array_wt(struct page **pages, int addrinarray);
int set_pages_array_wb(struct page **pages, int addrinarray);
/*
@@ -83,57 +83,7 @@ int set_pages_rw(struct page *page, int numpages);
int set_direct_map_invalid_noflush(struct page *page);
int set_direct_map_default_noflush(struct page *page);
bool kernel_page_present(struct page *page);
-void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc);
extern int kernel_set_to_readonly;
-#ifdef CONFIG_X86_64
-/*
- * Prevent speculative access to the page by either unmapping
- * it (if we do not require access to any part of the page) or
- * marking it uncacheable (if we want to try to retrieve data
- * from non-poisoned lines in the page).
- */
-static inline int set_mce_nospec(unsigned long pfn, bool unmap)
-{
- unsigned long decoy_addr;
- int rc;
-
- /*
- * We would like to just call:
- * set_memory_XX((unsigned long)pfn_to_kaddr(pfn), 1);
- * but doing that would radically increase the odds of a
- * speculative access to the poison page because we'd have
- * the virtual address of the kernel 1:1 mapping sitting
- * around in registers.
- * Instead we get tricky. We create a non-canonical address
- * that looks just like the one we want, but has bit 63 flipped.
- * This relies on set_memory_XX() properly sanitizing any __pa()
- * results with __PHYSICAL_MASK or PTE_PFN_MASK.
- */
- decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
-
- if (unmap)
- rc = set_memory_np(decoy_addr, 1);
- else
- rc = set_memory_uc(decoy_addr, 1);
- if (rc)
- pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
- return rc;
-}
-#define set_mce_nospec set_mce_nospec
-
-/* Restore full speculative operation to the pfn. */
-static inline int clear_mce_nospec(unsigned long pfn)
-{
- return set_memory_wb((unsigned long) pfn_to_kaddr(pfn), 1);
-}
-#define clear_mce_nospec clear_mce_nospec
-#else
-/*
- * Few people would run a 32-bit kernel on a machine that supports
- * recoverable errors because they have too much memory to boot 32-bit.
- */
-#endif
-
#endif /* _ASM_X86_SET_MEMORY_H */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index a12458a7a8d4..f37cbff7354c 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -8,6 +8,7 @@
#include <linux/linkage.h>
#include <asm/page_types.h>
+#include <asm/ibt.h>
#ifdef __i386__
@@ -49,7 +50,6 @@ extern unsigned long saved_video_mode;
extern void reserve_standard_io_resources(void);
extern void i386_reserve_resources(void);
extern unsigned long __startup_64(unsigned long physaddr, struct boot_params *bp);
-extern unsigned long __startup_secondary_64(void);
extern void startup_64_setup_env(unsigned long physbase);
extern void early_setup_idt(void);
extern void __init do_early_exception(struct pt_regs *regs, int trapnr);
@@ -108,29 +108,21 @@ extern unsigned long _brk_end;
void *extend_brk(size_t size, size_t align);
/*
- * Reserve space in the brk section. The name must be unique within
- * the file, and somewhat descriptive. The size is in bytes. Must be
- * used at file scope.
+ * Reserve space in the .brk section, which is a block of memory from which the
+ * caller is allowed to allocate very early (before even memblock is available)
+ * by calling extend_brk(). All allocated memory will be eventually converted
+ * to memblock. Any leftover unallocated memory will be freed.
*
- * (This uses a temp function to wrap the asm so we can pass it the
- * size parameter; otherwise we wouldn't be able to. We can't use a
- * "section" attribute on a normal variable because it always ends up
- * being @progbits, which ends up allocating space in the vmlinux
- * executable.)
+ * The size is in bytes.
*/
-#define RESERVE_BRK(name,sz) \
- static void __section(".discard.text") __used notrace \
- __brk_reservation_fn_##name##__(void) { \
- asm volatile ( \
- ".pushsection .brk_reservation,\"aw\",@nobits;" \
- ".brk." #name ":" \
- " 1:.skip %c0;" \
- " .size .brk." #name ", . - 1b;" \
- " .popsection" \
- : : "i" (sz)); \
- }
+#define RESERVE_BRK(name, size) \
+ __section(".bss..brk") __aligned(1) __used \
+ static char __brk_##name[size]
extern void probe_roms(void);
+
+void clear_bss(void);
+
#ifdef __i386__
asmlinkage void __init i386_start_kernel(void);
@@ -141,12 +133,19 @@ asmlinkage void __init x86_64_start_reservations(char *real_mode_data);
#endif /* __i386__ */
#endif /* _SETUP */
-#else
-#define RESERVE_BRK(name,sz) \
- .pushsection .brk_reservation,"aw",@nobits; \
-.brk.name: \
-1: .skip sz; \
- .size .brk.name,.-1b; \
+
+#else /* __ASSEMBLY */
+
+.macro __RESERVE_BRK name, size
+ .pushsection .bss..brk, "aw"
+SYM_DATA_START(__brk_\name)
+ .skip \size
+SYM_DATA_END(__brk_\name)
.popsection
+.endm
+
+#define RESERVE_BRK(name, size) __RESERVE_BRK name, size
+
#endif /* __ASSEMBLY__ */
+
#endif /* _ASM_X86_SETUP_H */
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index 6acaf5af0a3d..b8357d6ecd47 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -18,20 +18,19 @@
/* SEV Information Request/Response */
#define GHCB_MSR_SEV_INFO_RESP 0x001
#define GHCB_MSR_SEV_INFO_REQ 0x002
-#define GHCB_MSR_VER_MAX_POS 48
-#define GHCB_MSR_VER_MAX_MASK 0xffff
-#define GHCB_MSR_VER_MIN_POS 32
-#define GHCB_MSR_VER_MIN_MASK 0xffff
-#define GHCB_MSR_CBIT_POS 24
-#define GHCB_MSR_CBIT_MASK 0xff
-#define GHCB_MSR_SEV_INFO(_max, _min, _cbit) \
- ((((_max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) | \
- (((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) | \
- (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) | \
+
+#define GHCB_MSR_SEV_INFO(_max, _min, _cbit) \
+ /* GHCBData[63:48] */ \
+ ((((_max) & 0xffff) << 48) | \
+ /* GHCBData[47:32] */ \
+ (((_min) & 0xffff) << 32) | \
+ /* GHCBData[31:24] */ \
+ (((_cbit) & 0xff) << 24) | \
GHCB_MSR_SEV_INFO_RESP)
+
#define GHCB_MSR_INFO(v) ((v) & 0xfffUL)
-#define GHCB_MSR_PROTO_MAX(v) (((v) >> GHCB_MSR_VER_MAX_POS) & GHCB_MSR_VER_MAX_MASK)
-#define GHCB_MSR_PROTO_MIN(v) (((v) >> GHCB_MSR_VER_MIN_POS) & GHCB_MSR_VER_MIN_MASK)
+#define GHCB_MSR_PROTO_MAX(v) (((v) >> 48) & 0xffff)
+#define GHCB_MSR_PROTO_MIN(v) (((v) >> 32) & 0xffff)
/* CPUID Request/Response */
#define GHCB_MSR_CPUID_REQ 0x004
@@ -46,30 +45,118 @@
#define GHCB_CPUID_REQ_EBX 1
#define GHCB_CPUID_REQ_ECX 2
#define GHCB_CPUID_REQ_EDX 3
-#define GHCB_CPUID_REQ(fn, reg) \
- (GHCB_MSR_CPUID_REQ | \
- (((unsigned long)reg & GHCB_MSR_CPUID_REG_MASK) << GHCB_MSR_CPUID_REG_POS) | \
- (((unsigned long)fn) << GHCB_MSR_CPUID_FUNC_POS))
+#define GHCB_CPUID_REQ(fn, reg) \
+ /* GHCBData[11:0] */ \
+ (GHCB_MSR_CPUID_REQ | \
+ /* GHCBData[31:12] */ \
+ (((unsigned long)(reg) & 0x3) << 30) | \
+ /* GHCBData[63:32] */ \
+ (((unsigned long)fn) << 32))
/* AP Reset Hold */
-#define GHCB_MSR_AP_RESET_HOLD_REQ 0x006
-#define GHCB_MSR_AP_RESET_HOLD_RESP 0x007
+#define GHCB_MSR_AP_RESET_HOLD_REQ 0x006
+#define GHCB_MSR_AP_RESET_HOLD_RESP 0x007
+
+/* GHCB GPA Register */
+#define GHCB_MSR_REG_GPA_REQ 0x012
+#define GHCB_MSR_REG_GPA_REQ_VAL(v) \
+ /* GHCBData[63:12] */ \
+ (((u64)((v) & GENMASK_ULL(51, 0)) << 12) | \
+ /* GHCBData[11:0] */ \
+ GHCB_MSR_REG_GPA_REQ)
+
+#define GHCB_MSR_REG_GPA_RESP 0x013
+#define GHCB_MSR_REG_GPA_RESP_VAL(v) \
+ /* GHCBData[63:12] */ \
+ (((u64)(v) & GENMASK_ULL(63, 12)) >> 12)
+
+/*
+ * SNP Page State Change Operation
+ *
+ * GHCBData[55:52] - Page operation:
+ * 0x0001 Page assignment, Private
+ * 0x0002 Page assignment, Shared
+ */
+enum psc_op {
+ SNP_PAGE_STATE_PRIVATE = 1,
+ SNP_PAGE_STATE_SHARED,
+};
+
+#define GHCB_MSR_PSC_REQ 0x014
+#define GHCB_MSR_PSC_REQ_GFN(gfn, op) \
+ /* GHCBData[55:52] */ \
+ (((u64)((op) & 0xf) << 52) | \
+ /* GHCBData[51:12] */ \
+ ((u64)((gfn) & GENMASK_ULL(39, 0)) << 12) | \
+ /* GHCBData[11:0] */ \
+ GHCB_MSR_PSC_REQ)
+
+#define GHCB_MSR_PSC_RESP 0x015
+#define GHCB_MSR_PSC_RESP_VAL(val) \
+ /* GHCBData[63:32] */ \
+ (((u64)(val) & GENMASK_ULL(63, 32)) >> 32)
/* GHCB Hypervisor Feature Request/Response */
-#define GHCB_MSR_HV_FT_REQ 0x080
-#define GHCB_MSR_HV_FT_RESP 0x081
+#define GHCB_MSR_HV_FT_REQ 0x080
+#define GHCB_MSR_HV_FT_RESP 0x081
+#define GHCB_MSR_HV_FT_RESP_VAL(v) \
+ /* GHCBData[63:12] */ \
+ (((u64)(v) & GENMASK_ULL(63, 12)) >> 12)
+
+#define GHCB_HV_FT_SNP BIT_ULL(0)
+#define GHCB_HV_FT_SNP_AP_CREATION BIT_ULL(1)
+
+/* SNP Page State Change NAE event */
+#define VMGEXIT_PSC_MAX_ENTRY 253
+
+struct psc_hdr {
+ u16 cur_entry;
+ u16 end_entry;
+ u32 reserved;
+} __packed;
+
+struct psc_entry {
+ u64 cur_page : 12,
+ gfn : 40,
+ operation : 4,
+ pagesize : 1,
+ reserved : 7;
+} __packed;
+
+struct snp_psc_desc {
+ struct psc_hdr hdr;
+ struct psc_entry entries[VMGEXIT_PSC_MAX_ENTRY];
+} __packed;
+
+/* Guest message request error code */
+#define SNP_GUEST_REQ_INVALID_LEN BIT_ULL(32)
#define GHCB_MSR_TERM_REQ 0x100
#define GHCB_MSR_TERM_REASON_SET_POS 12
#define GHCB_MSR_TERM_REASON_SET_MASK 0xf
#define GHCB_MSR_TERM_REASON_POS 16
#define GHCB_MSR_TERM_REASON_MASK 0xff
-#define GHCB_SEV_TERM_REASON(reason_set, reason_val) \
- (((((u64)reason_set) & GHCB_MSR_TERM_REASON_SET_MASK) << GHCB_MSR_TERM_REASON_SET_POS) | \
- ((((u64)reason_val) & GHCB_MSR_TERM_REASON_MASK) << GHCB_MSR_TERM_REASON_POS))
-#define GHCB_SEV_ES_REASON_GENERAL_REQUEST 0
-#define GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED 1
+#define GHCB_SEV_TERM_REASON(reason_set, reason_val) \
+ /* GHCBData[15:12] */ \
+ (((((u64)reason_set) & 0xf) << 12) | \
+ /* GHCBData[23:16] */ \
+ ((((u64)reason_val) & 0xff) << 16))
+
+/* Error codes from reason set 0 */
+#define SEV_TERM_SET_GEN 0
+#define GHCB_SEV_ES_GEN_REQ 0
+#define GHCB_SEV_ES_PROT_UNSUPPORTED 1
+#define GHCB_SNP_UNSUPPORTED 2
+
+/* Linux-specific reason codes (used with reason set 1) */
+#define SEV_TERM_SET_LINUX 1
+#define GHCB_TERM_REGISTER 0 /* GHCB GPA registration failure */
+#define GHCB_TERM_PSC 1 /* Page State Change failure */
+#define GHCB_TERM_PVALIDATE 2 /* Pvalidate failure */
+#define GHCB_TERM_NOT_VMPL0 3 /* SNP guest is not running at VMPL-0 */
+#define GHCB_TERM_CPUID 4 /* CPUID-validation failure */
+#define GHCB_TERM_CPUID_HV 5 /* CPUID failure during hypervisor fallback */
#define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK)
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index ec060c433589..ebc271bb6d8e 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -11,9 +11,10 @@
#include <linux/types.h>
#include <asm/insn.h>
#include <asm/sev-common.h>
+#include <asm/bootparam.h>
-#define GHCB_PROTO_OUR 0x0001UL
-#define GHCB_PROTOCOL_MAX 1ULL
+#define GHCB_PROTOCOL_MIN 1ULL
+#define GHCB_PROTOCOL_MAX 2ULL
#define GHCB_DEFAULT_USAGE 0ULL
#define VMGEXIT() { asm volatile("rep; vmmcall\n\r"); }
@@ -42,6 +43,24 @@ struct es_em_ctxt {
struct es_fault_info fi;
};
+/*
+ * AMD SEV Confidential computing blob structure. The structure is
+ * defined in OVMF UEFI firmware header:
+ * https://github.com/tianocore/edk2/blob/master/OvmfPkg/Include/Guid/ConfidentialComputingSevSnpBlob.h
+ */
+#define CC_BLOB_SEV_HDR_MAGIC 0x45444d41
+struct cc_blob_sev_info {
+ u32 magic;
+ u16 version;
+ u16 reserved;
+ u64 secrets_phys;
+ u32 secrets_len;
+ u32 rsvd1;
+ u64 cpuid_phys;
+ u32 cpuid_len;
+ u32 rsvd2;
+} __packed;
+
void do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code);
static inline u64 lower_bits(u64 val, unsigned int bits)
@@ -53,13 +72,67 @@ static inline u64 lower_bits(u64 val, unsigned int bits)
struct real_mode_header;
enum stack_type;
-struct ghcb;
/* Early IDT entry points for #VC handler */
extern void vc_no_ghcb(void);
extern void vc_boot_ghcb(void);
extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
+/* Software defined (when rFlags.CF = 1) */
+#define PVALIDATE_FAIL_NOUPDATE 255
+
+/* RMP page size */
+#define RMP_PG_SIZE_4K 0
+
+#define RMPADJUST_VMSA_PAGE_BIT BIT(16)
+
+/* SNP Guest message request */
+struct snp_req_data {
+ unsigned long req_gpa;
+ unsigned long resp_gpa;
+ unsigned long data_gpa;
+ unsigned int data_npages;
+};
+
+struct sev_guest_platform_data {
+ u64 secrets_gpa;
+};
+
+/*
+ * The secrets page contains 96-bytes of reserved field that can be used by
+ * the guest OS. The guest OS uses the area to save the message sequence
+ * number for each VMPCK.
+ *
+ * See the GHCB spec section Secret page layout for the format for this area.
+ */
+struct secrets_os_area {
+ u32 msg_seqno_0;
+ u32 msg_seqno_1;
+ u32 msg_seqno_2;
+ u32 msg_seqno_3;
+ u64 ap_jump_table_pa;
+ u8 rsvd[40];
+ u8 guest_usage[32];
+} __packed;
+
+#define VMPCK_KEY_LEN 32
+
+/* See the SNP spec version 0.9 for secrets page format */
+struct snp_secrets_page_layout {
+ u32 version;
+ u32 imien : 1,
+ rsvd1 : 31;
+ u32 fms;
+ u32 rsvd2;
+ u8 gosvw[16];
+ u8 vmpck0[VMPCK_KEY_LEN];
+ u8 vmpck1[VMPCK_KEY_LEN];
+ u8 vmpck2[VMPCK_KEY_LEN];
+ u8 vmpck3[VMPCK_KEY_LEN];
+ struct secrets_os_area os_area;
+ u8 rsvd3[3840];
+} __packed;
+
#ifdef CONFIG_AMD_MEM_ENCRYPT
extern struct static_key_false sev_es_enable_key;
extern void __sev_es_ist_enter(struct pt_regs *regs);
@@ -82,17 +155,72 @@ static __always_inline void sev_es_nmi_complete(void)
__sev_es_nmi_complete();
}
extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd);
-extern enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
- bool set_ghcb_msr,
- struct es_em_ctxt *ctxt,
- u64 exit_code, u64 exit_info_1,
- u64 exit_info_2);
+
+static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs)
+{
+ int rc;
+
+ /* "rmpadjust" mnemonic support in binutils 2.36 and newer */
+ asm volatile(".byte 0xF3,0x0F,0x01,0xFE\n\t"
+ : "=a"(rc)
+ : "a"(vaddr), "c"(rmp_psize), "d"(attrs)
+ : "memory", "cc");
+
+ return rc;
+}
+static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate)
+{
+ bool no_rmpupdate;
+ int rc;
+
+ /* "pvalidate" mnemonic support in binutils 2.36 and newer */
+ asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFF\n\t"
+ CC_SET(c)
+ : CC_OUT(c) (no_rmpupdate), "=a"(rc)
+ : "a"(vaddr), "c"(rmp_psize), "d"(validate)
+ : "memory", "cc");
+
+ if (no_rmpupdate)
+ return PVALIDATE_FAIL_NOUPDATE;
+
+ return rc;
+}
+void setup_ghcb(void);
+void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
+ unsigned int npages);
+void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
+ unsigned int npages);
+void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op);
+void snp_set_memory_shared(unsigned long vaddr, unsigned int npages);
+void snp_set_memory_private(unsigned long vaddr, unsigned int npages);
+void snp_set_wakeup_secondary_cpu(void);
+bool snp_init(struct boot_params *bp);
+void __init __noreturn snp_abort(void);
+int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned long *fw_err);
#else
static inline void sev_es_ist_enter(struct pt_regs *regs) { }
static inline void sev_es_ist_exit(void) { }
static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; }
static inline void sev_es_nmi_complete(void) { }
static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; }
+static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; }
+static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; }
+static inline void setup_ghcb(void) { }
+static inline void __init
+early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned int npages) { }
+static inline void __init
+early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned int npages) { }
+static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) { }
+static inline void snp_set_memory_shared(unsigned long vaddr, unsigned int npages) { }
+static inline void snp_set_memory_private(unsigned long vaddr, unsigned int npages) { }
+static inline void snp_set_wakeup_secondary_cpu(void) { }
+static inline bool snp_init(struct boot_params *bp) { return false; }
+static inline void snp_abort(void) { }
+static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input,
+ unsigned long *fw_err)
+{
+ return -ENOTTY;
+}
#endif
#endif
diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
index 05f3e21f01a7..eae20fa52b93 100644
--- a/arch/x86/include/asm/sgx.h
+++ b/arch/x86/include/asm/sgx.h
@@ -46,18 +46,41 @@ enum sgx_encls_function {
};
/**
+ * SGX_ENCLS_FAULT_FLAG - flag signifying an ENCLS return code is a trapnr
+ *
+ * ENCLS has its own (positive value) error codes and also generates
+ * ENCLS specific #GP and #PF faults. And the ENCLS values get munged
+ * with system error codes as everything percolates back up the stack.
+ * Unfortunately (for us), we need to precisely identify each unique
+ * error code, e.g. the action taken if EWB fails varies based on the
+ * type of fault and on the exact SGX error code, i.e. we can't simply
+ * convert all faults to -EFAULT.
+ *
+ * To make all three error types coexist, we set bit 30 to identify an
+ * ENCLS fault. Bit 31 (technically bits N:31) is used to differentiate
+ * between positive (faults and SGX error codes) and negative (system
+ * error codes) values.
+ */
+#define SGX_ENCLS_FAULT_FLAG 0x40000000
+
+/**
* enum sgx_return_code - The return code type for ENCLS, ENCLU and ENCLV
+ * %SGX_EPC_PAGE_CONFLICT: Page is being written by other ENCLS function.
* %SGX_NOT_TRACKED: Previous ETRACK's shootdown sequence has not
* been completed yet.
* %SGX_CHILD_PRESENT SECS has child pages present in the EPC.
* %SGX_INVALID_EINITTOKEN: EINITTOKEN is invalid and enclave signer's
* public key does not match IA32_SGXLEPUBKEYHASH.
+ * %SGX_PAGE_NOT_MODIFIABLE: The EPC page cannot be modified because it
+ * is in the PENDING or MODIFIED state.
* %SGX_UNMASKED_EVENT: An unmasked event, e.g. INTR, was received
*/
enum sgx_return_code {
+ SGX_EPC_PAGE_CONFLICT = 7,
SGX_NOT_TRACKED = 11,
SGX_CHILD_PRESENT = 13,
SGX_INVALID_EINITTOKEN = 16,
+ SGX_PAGE_NOT_MODIFIABLE = 20,
SGX_UNMASKED_EVENT = 128,
};
@@ -216,6 +239,9 @@ struct sgx_pageinfo {
* %SGX_PAGE_TYPE_REG: a regular page
* %SGX_PAGE_TYPE_VA: a VA page
* %SGX_PAGE_TYPE_TRIM: a page in trimmed state
+ *
+ * Make sure when making changes to this enum that its values can still fit
+ * in the bitfield within &struct sgx_encl_page
*/
enum sgx_page_type {
SGX_PAGE_TYPE_SECS,
diff --git a/arch/x86/include/asm/shared/io.h b/arch/x86/include/asm/shared/io.h
new file mode 100644
index 000000000000..c0ef921c0586
--- /dev/null
+++ b/arch/x86/include/asm/shared/io.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SHARED_IO_H
+#define _ASM_X86_SHARED_IO_H
+
+#include <linux/types.h>
+
+#define BUILDIO(bwl, bw, type) \
+static inline void __out##bwl(type value, u16 port) \
+{ \
+ asm volatile("out" #bwl " %" #bw "0, %w1" \
+ : : "a"(value), "Nd"(port)); \
+} \
+ \
+static inline type __in##bwl(u16 port) \
+{ \
+ type value; \
+ asm volatile("in" #bwl " %w1, %" #bw "0" \
+ : "=a"(value) : "Nd"(port)); \
+ return value; \
+}
+
+BUILDIO(b, b, u8)
+BUILDIO(w, w, u16)
+BUILDIO(l, , u32)
+#undef BUILDIO
+
+#define inb __inb
+#define inw __inw
+#define inl __inl
+#define outb __outb
+#define outw __outw
+#define outl __outl
+
+#endif
diff --git a/arch/x86/include/asm/shared/msr.h b/arch/x86/include/asm/shared/msr.h
new file mode 100644
index 000000000000..1e6ec10b3a15
--- /dev/null
+++ b/arch/x86/include/asm/shared/msr.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SHARED_MSR_H
+#define _ASM_X86_SHARED_MSR_H
+
+struct msr {
+ union {
+ struct {
+ u32 l;
+ u32 h;
+ };
+ u64 q;
+ };
+};
+
+#endif /* _ASM_X86_SHARED_MSR_H */
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
new file mode 100644
index 000000000000..e53f26228fbb
--- /dev/null
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SHARED_TDX_H
+#define _ASM_X86_SHARED_TDX_H
+
+#include <linux/bits.h>
+#include <linux/types.h>
+
+#define TDX_HYPERCALL_STANDARD 0
+
+#define TDX_HCALL_HAS_OUTPUT BIT(0)
+#define TDX_HCALL_ISSUE_STI BIT(1)
+
+#define TDX_CPUID_LEAF_ID 0x21
+#define TDX_IDENT "IntelTDX "
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Used in __tdx_hypercall() to pass down and get back registers' values of
+ * the TDCALL instruction when requesting services from the VMM.
+ *
+ * This is a software only structure and not part of the TDX module/VMM ABI.
+ */
+struct tdx_hypercall_args {
+ u64 r10;
+ u64 r11;
+ u64 r12;
+ u64 r13;
+ u64 r14;
+ u64 r15;
+};
+
+/* Used to request services from the VMM */
+u64 __tdx_hypercall(struct tdx_hypercall_args *args, unsigned long flags);
+
+/* Called from __tdx_hypercall() for unrecoverable failure */
+void __tdx_hypercall_failed(void);
+
+#endif /* !__ASSEMBLY__ */
+#endif /* _ASM_X86_SHARED_TDX_H */
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index d17b39893b79..bab490379c65 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -19,25 +19,14 @@
#ifdef __ASSEMBLY__
-#ifdef CONFIG_X86_SMAP
-
#define ASM_CLAC \
ALTERNATIVE "", __ASM_CLAC, X86_FEATURE_SMAP
#define ASM_STAC \
ALTERNATIVE "", __ASM_STAC, X86_FEATURE_SMAP
-#else /* CONFIG_X86_SMAP */
-
-#define ASM_CLAC
-#define ASM_STAC
-
-#endif /* CONFIG_X86_SMAP */
-
#else /* __ASSEMBLY__ */
-#ifdef CONFIG_X86_SMAP
-
static __always_inline void clac(void)
{
/* Note: a barrier is implicit in alternative() */
@@ -76,19 +65,6 @@ static __always_inline void smap_restore(unsigned long flags)
#define ASM_STAC \
ALTERNATIVE("", __ASM_STAC, X86_FEATURE_SMAP)
-#else /* CONFIG_X86_SMAP */
-
-static inline void clac(void) { }
-static inline void stac(void) { }
-
-static inline unsigned long smap_save(void) { return 0; }
-static inline void smap_restore(unsigned long flags) { }
-
-#define ASM_CLAC
-#define ASM_STAC
-
-#endif /* CONFIG_X86_SMAP */
-
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_SMAP_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 81a0211a372d..a73bced40e24 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -21,16 +21,6 @@ DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id);
DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id);
DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);
-static inline struct cpumask *cpu_llc_shared_mask(int cpu)
-{
- return per_cpu(cpu_llc_shared_map, cpu);
-}
-
-static inline struct cpumask *cpu_l2c_shared_mask(int cpu)
-{
- return per_cpu(cpu_l2c_shared_map, cpu);
-}
-
DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid);
DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid);
DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid);
@@ -172,6 +162,16 @@ extern int safe_smp_processor_id(void);
# define safe_smp_processor_id() smp_processor_id()
#endif
+static inline struct cpumask *cpu_llc_shared_mask(int cpu)
+{
+ return per_cpu(cpu_llc_shared_map, cpu);
+}
+
+static inline struct cpumask *cpu_l2c_shared_mask(int cpu)
+{
+ return per_cpu(cpu_l2c_shared_map, cpu);
+}
+
#else /* !CONFIG_SMP */
#define wbinvd_on_cpu(cpu) wbinvd()
static inline int wbinvd_on_all_cpus(void)
@@ -179,6 +179,11 @@ static inline int wbinvd_on_all_cpus(void)
wbinvd();
return 0;
}
+
+static inline struct cpumask *cpu_llc_shared_mask(int cpu)
+{
+ return (struct cpumask *)cpumask_of(0);
+}
#endif /* CONFIG_SMP */
extern unsigned disabled_cpus;
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index 6a9ccc1b2be5..64df897c0ee3 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_SPARSEMEM_H
#define _ASM_X86_SPARSEMEM_H
+#include <linux/types.h>
+
#ifdef CONFIG_SPARSEMEM
/*
* generic non-linear memory support:
diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
index 5393babc0598..cb0386fc4dc3 100644
--- a/arch/x86/include/asm/spec-ctrl.h
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -13,7 +13,7 @@
* Takes the guest view of SPEC_CTRL MSR as a parameter and also
* the guest's version of VIRT_SPEC_CTRL, if emulated.
*/
-extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest);
+extern void x86_virt_spec_ctrl(u64 guest_virt_spec_ctrl, bool guest);
/**
* x86_spec_ctrl_set_guest - Set speculation control registers for the guest
@@ -24,9 +24,9 @@ extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bo
* Avoids writing to the MSR if the content/bits are the same
*/
static inline
-void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
+void x86_spec_ctrl_set_guest(u64 guest_virt_spec_ctrl)
{
- x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true);
+ x86_virt_spec_ctrl(guest_virt_spec_ctrl, true);
}
/**
@@ -38,9 +38,9 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
* Avoids writing to the MSR if the content/bits are the same
*/
static inline
-void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
+void x86_spec_ctrl_restore_host(u64 guest_virt_spec_ctrl)
{
- x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false);
+ x86_virt_spec_ctrl(guest_virt_spec_ctrl, false);
}
/* AMD specific Speculative Store Bypass MSR data */
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 68c257a3de0d..35f709f619fb 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -184,14 +184,15 @@ static inline void wbinvd(void)
native_wbinvd();
}
-#ifdef CONFIG_X86_64
static inline void load_gs_index(unsigned int selector)
{
+#ifdef CONFIG_X86_64
native_load_gs_index(selector);
-}
-
+#else
+ loadsegment(gs, selector);
#endif
+}
#endif /* CONFIG_PARAVIRT_XXL */
@@ -294,6 +295,15 @@ static inline int enqcmds(void __iomem *dst, const void *src)
return 0;
}
+static inline void tile_release(void)
+{
+ /*
+ * Instruction opcode for TILERELEASE; supported in binutils
+ * version >= 2.36.
+ */
+ asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0");
+}
+
#endif /* __KERNEL__ */
#endif /* _ASM_X86_SPECIAL_INSNS_H */
diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h
index 39ebe0511869..343b722ccaf2 100644
--- a/arch/x86/include/asm/static_call.h
+++ b/arch/x86/include/asm/static_call.h
@@ -21,13 +21,24 @@
* relative displacement across sections.
*/
+/*
+ * The trampoline is 8 bytes and of the general form:
+ *
+ * jmp.d32 \func
+ * ud1 %esp, %ecx
+ *
+ * That trailing #UD provides both a speculation stop and serves as a unique
+ * 3 byte signature identifying static call trampolines. Also see tramp_ud[]
+ * and __static_call_fixup().
+ */
#define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \
asm(".pushsection .static_call.text, \"ax\" \n" \
".align 4 \n" \
".globl " STATIC_CALL_TRAMP_STR(name) " \n" \
STATIC_CALL_TRAMP_STR(name) ": \n" \
+ ANNOTATE_NOENDBR \
insns " \n" \
- ".byte 0x53, 0x43, 0x54 \n" \
+ ".byte 0x0f, 0xb9, 0xcc \n" \
".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \
".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
".popsection \n")
@@ -35,9 +46,16 @@
#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")
+#ifdef CONFIG_RETHUNK
+#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
+ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk")
+#else
#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
- __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; nop; nop; nop; nop")
+ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop")
+#endif
+#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) \
+ ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0)
#define ARCH_ADD_TRAMP_KEY(name) \
asm(".pushsection .static_call_tramp_key, \"a\" \n" \
@@ -45,4 +63,6 @@
".long " STATIC_CALL_KEY_STR(name) " - . \n" \
".popsection \n")
+extern bool __static_call_fixup(void *tramp, u8 op, void *dest);
+
#endif /* _ASM_STATIC_CALL_H */
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h
index f74362b05619..32c0d981a82a 100644
--- a/arch/x86/include/asm/string_32.h
+++ b/arch/x86/include/asm/string_32.h
@@ -146,42 +146,9 @@ static __always_inline void *__constant_memcpy(void *to, const void *from,
extern void *memcpy(void *, const void *, size_t);
#ifndef CONFIG_FORTIFY_SOURCE
-#ifdef CONFIG_X86_USE_3DNOW
-
-#include <asm/mmx.h>
-
-/*
- * This CPU favours 3DNow strongly (eg AMD Athlon)
- */
-
-static inline void *__constant_memcpy3d(void *to, const void *from, size_t len)
-{
- if (len < 512)
- return __constant_memcpy(to, from, len);
- return _mmx_memcpy(to, from, len);
-}
-
-static inline void *__memcpy3d(void *to, const void *from, size_t len)
-{
- if (len < 512)
- return __memcpy(to, from, len);
- return _mmx_memcpy(to, from, len);
-}
-
-#define memcpy(t, f, n) \
- (__builtin_constant_p((n)) \
- ? __constant_memcpy3d((t), (f), (n)) \
- : __memcpy3d((t), (f), (n)))
-
-#else
-
-/*
- * No 3D Now!
- */
#define memcpy(t, f, n) __builtin_memcpy(t, f, n)
-#endif
#endif /* !CONFIG_FORTIFY_SOURCE */
#define __HAVE_ARCH_MEMMOVE
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 6e450827f677..888731ccf1f6 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -10,12 +10,27 @@
/* Even with __builtin_ the compiler may decide to use the out of line
function. */
+#if defined(__SANITIZE_MEMORY__) && defined(__NO_FORTIFY)
+#include <linux/kmsan_string.h>
+#endif
+
#define __HAVE_ARCH_MEMCPY 1
+#if defined(__SANITIZE_MEMORY__) && defined(__NO_FORTIFY)
+#undef memcpy
+#define memcpy __msan_memcpy
+#else
extern void *memcpy(void *to, const void *from, size_t len);
+#endif
extern void *__memcpy(void *to, const void *from, size_t len);
#define __HAVE_ARCH_MEMSET
+#if defined(__SANITIZE_MEMORY__) && defined(__NO_FORTIFY)
+extern void *__msan_memset(void *s, int c, size_t n);
+#undef memset
+#define memset __msan_memset
+#else
void *memset(void *s, int c, size_t n);
+#endif
void *__memset(void *s, int c, size_t n);
#define __HAVE_ARCH_MEMSET16
@@ -55,7 +70,13 @@ static inline void *memset64(uint64_t *s, uint64_t v, size_t n)
}
#define __HAVE_ARCH_MEMMOVE
+#if defined(__SANITIZE_MEMORY__) && defined(__NO_FORTIFY)
+#undef memmove
+void *__msan_memmove(void *dest, const void *src, size_t len);
+#define memmove __msan_memmove
+#else
void *memmove(void *dest, const void *src, size_t count);
+#endif
void *__memmove(void *dest, const void *src, size_t count);
int memcmp(const void *cs, const void *ct, size_t count);
@@ -64,8 +85,7 @@ char *strcpy(char *dest, const char *src);
char *strcat(char *dest, const char *src);
int strcmp(const char *cs, const char *ct);
-#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
-
+#if (defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__))
/*
* For files that not instrumented (e.g. mm/slub.c) we
* should use not instrumented version of mem* functions.
@@ -73,7 +93,9 @@ int strcmp(const char *cs, const char *ct);
#undef memcpy
#define memcpy(dst, src, len) __memcpy(dst, src, len)
+#undef memmove
#define memmove(dst, src, len) __memmove(dst, src, len)
+#undef memset
#define memset(s, c, n) __memset(s, c, n)
#ifndef __NO_FORTIFY
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index 7b132d0312eb..a800abb1a992 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -19,7 +19,6 @@ struct saved_context {
u16 gs;
unsigned long cr0, cr2, cr3, cr4;
u64 misc_enable;
- bool misc_enable_saved;
struct saved_msrs saved_msrs;
struct desc_ptr gdt_desc;
struct desc_ptr idt;
@@ -28,6 +27,7 @@ struct saved_context {
unsigned long tr;
unsigned long safety;
unsigned long return_address;
+ bool misc_enable_saved;
} __attribute__((packed));
/* routines for saving/restoring kernel state */
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index 35bb35d28733..54df06687d83 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -14,9 +14,13 @@
* Image of the saved processor state, used by the low level ACPI suspend to
* RAM code and by the low level hibernation code.
*
- * If you modify it, fix arch/x86/kernel/acpi/wakeup_64.S and make sure that
- * __save/__restore_processor_state(), defined in arch/x86/kernel/suspend_64.c,
- * still work as required.
+ * If you modify it, check how it is used in arch/x86/kernel/acpi/wakeup_64.S
+ * and make sure that __save/__restore_processor_state(), defined in
+ * arch/x86/power/cpu.c, still work as required.
+ *
+ * Because the structure is packed, make sure to avoid unaligned members. For
+ * optimisation purposes but also because tools like kmemleak only search for
+ * pointers that are aligned.
*/
struct saved_context {
struct pt_regs regs;
@@ -36,7 +40,6 @@ struct saved_context {
unsigned long cr0, cr2, cr3, cr4;
u64 misc_enable;
- bool misc_enable_saved;
struct saved_msrs saved_msrs;
unsigned long efer;
u16 gdt_pad; /* Unused */
@@ -48,6 +51,7 @@ struct saved_context {
unsigned long tr;
unsigned long safety;
unsigned long return_address;
+ bool misc_enable_saved;
} __attribute__((packed));
#define loaddebug(thread,register) \
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index b00dbc5fac2b..0361626841bc 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -195,6 +195,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define AVIC_ENABLE_SHIFT 31
#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
+#define X2APIC_MODE_SHIFT 30
+#define X2APIC_MODE_MASK (1 << X2APIC_MODE_SHIFT)
+
#define LBR_CTL_ENABLE_MASK BIT_ULL(0)
#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
@@ -220,6 +223,57 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define SVM_NESTED_CTL_SEV_ENABLE BIT(1)
#define SVM_NESTED_CTL_SEV_ES_ENABLE BIT(2)
+
+#define SVM_TSC_RATIO_RSVD 0xffffff0000000000ULL
+#define SVM_TSC_RATIO_MIN 0x0000000000000001ULL
+#define SVM_TSC_RATIO_MAX 0x000000ffffffffffULL
+#define SVM_TSC_RATIO_DEFAULT 0x0100000000ULL
+
+
+/* AVIC */
+#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFFULL)
+#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31
+#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
+
+#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0)
+#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12)
+#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62)
+#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63)
+#define AVIC_PHYSICAL_ID_TABLE_SIZE_MASK (0xFFULL)
+
+#define AVIC_DOORBELL_PHYSICAL_ID_MASK GENMASK_ULL(11, 0)
+
+#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
+
+#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1
+#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0
+#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF
+
+enum avic_ipi_failure_cause {
+ AVIC_IPI_FAILURE_INVALID_INT_TYPE,
+ AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
+ AVIC_IPI_FAILURE_INVALID_TARGET,
+ AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
+};
+
+#define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(9, 0)
+
+/*
+ * For AVIC, the max index allowed for physical APIC ID
+ * table is 0xff (255).
+ */
+#define AVIC_MAX_PHYSICAL_ID 0XFEULL
+
+/*
+ * For x2AVIC, the max index allowed for physical APIC ID
+ * table is 0x1ff (511).
+ */
+#define X2AVIC_MAX_PHYSICAL_ID 0x1FFUL
+
+#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
+#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
+
+
struct vmcb_seg {
u16 selector;
u16 attrib;
@@ -227,6 +281,7 @@ struct vmcb_seg {
u64 base;
} __packed;
+/* Save area definition for legacy and SEV-MEM guests */
struct vmcb_save_area {
struct vmcb_seg es;
struct vmcb_seg cs;
@@ -238,12 +293,12 @@ struct vmcb_save_area {
struct vmcb_seg ldtr;
struct vmcb_seg idtr;
struct vmcb_seg tr;
- u8 reserved_1[43];
+ u8 reserved_1[42];
+ u8 vmpl;
u8 cpl;
u8 reserved_2[4];
u64 efer;
- u8 reserved_3[104];
- u64 xss; /* Valid for SEV-ES only */
+ u8 reserved_3[112];
u64 cr4;
u64 cr3;
u64 cr0;
@@ -253,7 +308,9 @@ struct vmcb_save_area {
u64 rip;
u8 reserved_4[88];
u64 rsp;
- u8 reserved_5[24];
+ u64 s_cet;
+ u64 ssp;
+ u64 isst_addr;
u64 rax;
u64 star;
u64 lstar;
@@ -264,29 +321,145 @@ struct vmcb_save_area {
u64 sysenter_esp;
u64 sysenter_eip;
u64 cr2;
- u8 reserved_6[32];
+ u8 reserved_5[32];
u64 g_pat;
u64 dbgctl;
u64 br_from;
u64 br_to;
u64 last_excp_from;
u64 last_excp_to;
-
- /*
- * The following part of the save area is valid only for
- * SEV-ES guests when referenced through the GHCB or for
- * saving to the host save area.
- */
- u8 reserved_7[72];
+ u8 reserved_6[72];
u32 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */
- u8 reserved_7b[4];
+} __packed;
+
+/* Save area definition for SEV-ES and SEV-SNP guests */
+struct sev_es_save_area {
+ struct vmcb_seg es;
+ struct vmcb_seg cs;
+ struct vmcb_seg ss;
+ struct vmcb_seg ds;
+ struct vmcb_seg fs;
+ struct vmcb_seg gs;
+ struct vmcb_seg gdtr;
+ struct vmcb_seg ldtr;
+ struct vmcb_seg idtr;
+ struct vmcb_seg tr;
+ u64 vmpl0_ssp;
+ u64 vmpl1_ssp;
+ u64 vmpl2_ssp;
+ u64 vmpl3_ssp;
+ u64 u_cet;
+ u8 reserved_1[2];
+ u8 vmpl;
+ u8 cpl;
+ u8 reserved_2[4];
+ u64 efer;
+ u8 reserved_3[104];
+ u64 xss;
+ u64 cr4;
+ u64 cr3;
+ u64 cr0;
+ u64 dr7;
+ u64 dr6;
+ u64 rflags;
+ u64 rip;
+ u64 dr0;
+ u64 dr1;
+ u64 dr2;
+ u64 dr3;
+ u64 dr0_addr_mask;
+ u64 dr1_addr_mask;
+ u64 dr2_addr_mask;
+ u64 dr3_addr_mask;
+ u8 reserved_4[24];
+ u64 rsp;
+ u64 s_cet;
+ u64 ssp;
+ u64 isst_addr;
+ u64 rax;
+ u64 star;
+ u64 lstar;
+ u64 cstar;
+ u64 sfmask;
+ u64 kernel_gs_base;
+ u64 sysenter_cs;
+ u64 sysenter_esp;
+ u64 sysenter_eip;
+ u64 cr2;
+ u8 reserved_5[32];
+ u64 g_pat;
+ u64 dbgctl;
+ u64 br_from;
+ u64 br_to;
+ u64 last_excp_from;
+ u64 last_excp_to;
+ u8 reserved_7[80];
u32 pkru;
- u8 reserved_7a[20];
- u64 reserved_8; /* rax already available at 0x01f8 */
+ u8 reserved_8[20];
+ u64 reserved_9; /* rax already available at 0x01f8 */
+ u64 rcx;
+ u64 rdx;
+ u64 rbx;
+ u64 reserved_10; /* rsp already available at 0x01d8 */
+ u64 rbp;
+ u64 rsi;
+ u64 rdi;
+ u64 r8;
+ u64 r9;
+ u64 r10;
+ u64 r11;
+ u64 r12;
+ u64 r13;
+ u64 r14;
+ u64 r15;
+ u8 reserved_11[16];
+ u64 guest_exit_info_1;
+ u64 guest_exit_info_2;
+ u64 guest_exit_int_info;
+ u64 guest_nrip;
+ u64 sev_features;
+ u64 vintr_ctrl;
+ u64 guest_exit_code;
+ u64 virtual_tom;
+ u64 tlb_id;
+ u64 pcpu_id;
+ u64 event_inj;
+ u64 xcr0;
+ u8 reserved_12[16];
+
+ /* Floating point area */
+ u64 x87_dp;
+ u32 mxcsr;
+ u16 x87_ftw;
+ u16 x87_fsw;
+ u16 x87_fcw;
+ u16 x87_fop;
+ u16 x87_ds;
+ u16 x87_cs;
+ u64 x87_rip;
+ u8 fpreg_x87[80];
+ u8 fpreg_xmm[256];
+ u8 fpreg_ymm[256];
+} __packed;
+
+struct ghcb_save_area {
+ u8 reserved_1[203];
+ u8 cpl;
+ u8 reserved_2[116];
+ u64 xss;
+ u8 reserved_3[24];
+ u64 dr7;
+ u8 reserved_4[16];
+ u64 rip;
+ u8 reserved_5[88];
+ u64 rsp;
+ u8 reserved_6[24];
+ u64 rax;
+ u8 reserved_7[264];
u64 rcx;
u64 rdx;
u64 rbx;
- u64 reserved_9; /* rsp already available at 0x01d8 */
+ u8 reserved_8[8];
u64 rbp;
u64 rsi;
u64 rdi;
@@ -298,22 +471,24 @@ struct vmcb_save_area {
u64 r13;
u64 r14;
u64 r15;
- u8 reserved_10[16];
+ u8 reserved_9[16];
u64 sw_exit_code;
u64 sw_exit_info_1;
u64 sw_exit_info_2;
u64 sw_scratch;
- u8 reserved_11[56];
+ u8 reserved_10[56];
u64 xcr0;
u8 valid_bitmap[16];
u64 x87_state_gpa;
} __packed;
+#define GHCB_SHARED_BUF_SIZE 2032
+
struct ghcb {
- struct vmcb_save_area save;
- u8 reserved_save[2048 - sizeof(struct vmcb_save_area)];
+ struct ghcb_save_area save;
+ u8 reserved_save[2048 - sizeof(struct ghcb_save_area)];
- u8 shared_buffer[2032];
+ u8 shared_buffer[GHCB_SHARED_BUF_SIZE];
u8 reserved_1[10];
u16 protocol_version; /* negotiated SEV-ES/GHCB protocol version */
@@ -321,13 +496,17 @@ struct ghcb {
} __packed;
-#define EXPECTED_VMCB_SAVE_AREA_SIZE 1032
+#define EXPECTED_VMCB_SAVE_AREA_SIZE 740
+#define EXPECTED_GHCB_SAVE_AREA_SIZE 1032
+#define EXPECTED_SEV_ES_SAVE_AREA_SIZE 1648
#define EXPECTED_VMCB_CONTROL_AREA_SIZE 1024
#define EXPECTED_GHCB_SIZE PAGE_SIZE
static inline void __unused_size_checks(void)
{
BUILD_BUG_ON(sizeof(struct vmcb_save_area) != EXPECTED_VMCB_SAVE_AREA_SIZE);
+ BUILD_BUG_ON(sizeof(struct ghcb_save_area) != EXPECTED_GHCB_SAVE_AREA_SIZE);
+ BUILD_BUG_ON(sizeof(struct sev_es_save_area) != EXPECTED_SEV_ES_SAVE_AREA_SIZE);
BUILD_BUG_ON(sizeof(struct vmcb_control_area) != EXPECTED_VMCB_CONTROL_AREA_SIZE);
BUILD_BUG_ON(sizeof(struct ghcb) != EXPECTED_GHCB_SIZE);
}
@@ -397,26 +576,26 @@ struct vmcb {
/* GHCB Accessor functions */
#define GHCB_BITMAP_IDX(field) \
- (offsetof(struct vmcb_save_area, field) / sizeof(u64))
+ (offsetof(struct ghcb_save_area, field) / sizeof(u64))
#define DEFINE_GHCB_ACCESSORS(field) \
- static inline bool ghcb_##field##_is_valid(const struct ghcb *ghcb) \
+ static __always_inline bool ghcb_##field##_is_valid(const struct ghcb *ghcb) \
{ \
return test_bit(GHCB_BITMAP_IDX(field), \
(unsigned long *)&ghcb->save.valid_bitmap); \
} \
\
- static inline u64 ghcb_get_##field(struct ghcb *ghcb) \
+ static __always_inline u64 ghcb_get_##field(struct ghcb *ghcb) \
{ \
return ghcb->save.field; \
} \
\
- static inline u64 ghcb_get_##field##_if_valid(struct ghcb *ghcb) \
+ static __always_inline u64 ghcb_get_##field##_if_valid(struct ghcb *ghcb) \
{ \
return ghcb_##field##_is_valid(ghcb) ? ghcb->save.field : 0; \
} \
\
- static inline void ghcb_set_##field(struct ghcb *ghcb, u64 value) \
+ static __always_inline void ghcb_set_##field(struct ghcb *ghcb, u64 value) \
{ \
__set_bit(GHCB_BITMAP_IDX(field), \
(unsigned long *)&ghcb->save.valid_bitmap); \
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
deleted file mode 100644
index ff6c92eff035..000000000000
--- a/arch/x86/include/asm/swiotlb.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_SWIOTLB_H
-#define _ASM_X86_SWIOTLB_H
-
-#include <linux/swiotlb.h>
-
-#ifdef CONFIG_SWIOTLB
-extern int swiotlb;
-extern int __init pci_swiotlb_detect_override(void);
-extern int __init pci_swiotlb_detect_4gb(void);
-extern void __init pci_swiotlb_init(void);
-extern void __init pci_swiotlb_late_init(void);
-#else
-#define swiotlb 0
-static inline int pci_swiotlb_detect_override(void)
-{
- return 0;
-}
-static inline int pci_swiotlb_detect_4gb(void)
-{
- return 0;
-}
-static inline void pci_swiotlb_init(void)
-{
-}
-static inline void pci_swiotlb_late_init(void)
-{
-}
-#endif
-#endif /* _ASM_X86_SWIOTLB_H */
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index b5f0d2ff47e4..c08eb0fdd11f 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -78,13 +78,13 @@ static inline void update_task_stack(struct task_struct *task)
}
static inline void kthread_frame_init(struct inactive_task_frame *frame,
- unsigned long fun, unsigned long arg)
+ int (*fun)(void *), void *arg)
{
- frame->bx = fun;
+ frame->bx = (unsigned long)fun;
#ifdef CONFIG_X86_32
- frame->di = arg;
+ frame->di = (unsigned long)arg;
#else
- frame->r12 = arg;
+ frame->r12 = (unsigned long)arg;
#endif
}
diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
index 6a2827d0681f..fd2669b1cb2d 100644
--- a/arch/x86/include/asm/syscall_wrapper.h
+++ b/arch/x86/include/asm/syscall_wrapper.h
@@ -6,7 +6,7 @@
#ifndef _ASM_X86_SYSCALL_WRAPPER_H
#define _ASM_X86_SYSCALL_WRAPPER_H
-struct pt_regs;
+#include <asm/ptrace.h>
extern long __x64_sys_ni_syscall(const struct pt_regs *regs);
extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
@@ -159,7 +159,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
#endif /* CONFIG_IA32_EMULATION */
-#ifdef CONFIG_X86_X32
+#ifdef CONFIG_X86_X32_ABI
/*
* For the x32 ABI, we need to create a stub for compat_sys_*() which is aware
* of the x86-64-style parameter ordering of x32 syscalls. The syscalls common
@@ -177,12 +177,12 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
#define __X32_COMPAT_SYS_NI(name) \
__SYS_NI(x64, compat_sys_##name)
-#else /* CONFIG_X86_X32 */
+#else /* CONFIG_X86_X32_ABI */
#define __X32_COMPAT_SYS_STUB0(name)
#define __X32_COMPAT_SYS_STUBx(x, name, ...)
#define __X32_COMPAT_COND_SYSCALL(name)
#define __X32_COMPAT_SYS_NI(name)
-#endif /* CONFIG_X86_X32 */
+#endif /* CONFIG_X86_X32_ABI */
#ifdef CONFIG_COMPAT
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
new file mode 100644
index 000000000000..020c81a7c729
--- /dev/null
+++ b/arch/x86/include/asm/tdx.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021-2022 Intel Corporation */
+#ifndef _ASM_X86_TDX_H
+#define _ASM_X86_TDX_H
+
+#include <linux/init.h>
+#include <linux/bits.h>
+#include <asm/ptrace.h>
+#include <asm/shared/tdx.h>
+
+/*
+ * SW-defined error codes.
+ *
+ * Bits 47:40 == 0xFF indicate Reserved status code class that never used by
+ * TDX module.
+ */
+#define TDX_ERROR _BITUL(63)
+#define TDX_SW_ERROR (TDX_ERROR | GENMASK_ULL(47, 40))
+#define TDX_SEAMCALL_VMFAILINVALID (TDX_SW_ERROR | _UL(0xFFFF0000))
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Used to gather the output registers values of the TDCALL and SEAMCALL
+ * instructions when requesting services from the TDX module.
+ *
+ * This is a software only structure and not part of the TDX module/VMM ABI.
+ */
+struct tdx_module_output {
+ u64 rcx;
+ u64 rdx;
+ u64 r8;
+ u64 r9;
+ u64 r10;
+ u64 r11;
+};
+
+/*
+ * Used by the #VE exception handler to gather the #VE exception
+ * info from the TDX module. This is a software only structure
+ * and not part of the TDX module/VMM ABI.
+ */
+struct ve_info {
+ u64 exit_reason;
+ u64 exit_qual;
+ /* Guest Linear (virtual) Address */
+ u64 gla;
+ /* Guest Physical Address */
+ u64 gpa;
+ u32 instr_len;
+ u32 instr_info;
+};
+
+#ifdef CONFIG_INTEL_TDX_GUEST
+
+void __init tdx_early_init(void);
+
+/* Used to communicate with the TDX module */
+u64 __tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
+ struct tdx_module_output *out);
+
+void tdx_get_ve_info(struct ve_info *ve);
+
+bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve);
+
+void tdx_safe_halt(void);
+
+bool tdx_early_handle_ve(struct pt_regs *regs);
+
+#else
+
+static inline void tdx_early_init(void) { };
+static inline void tdx_safe_halt(void) { };
+
+static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; }
+
+#endif /* CONFIG_INTEL_TDX_GUEST */
+
+#if defined(CONFIG_KVM_GUEST) && defined(CONFIG_INTEL_TDX_GUEST)
+long tdx_kvm_hypercall(unsigned int nr, unsigned long p1, unsigned long p2,
+ unsigned long p3, unsigned long p4);
+#else
+static inline long tdx_kvm_hypercall(unsigned int nr, unsigned long p1,
+ unsigned long p2, unsigned long p3,
+ unsigned long p4)
+{
+ return -ENODEV;
+}
+#endif /* CONFIG_INTEL_TDX_GUEST && CONFIG_KVM_GUEST */
+#endif /* !__ASSEMBLY__ */
+#endif /* _ASM_X86_TDX_H */
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index b7421780e4e9..1cc15528ce29 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -44,6 +44,8 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len);
extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void text_poke_sync(void);
extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
+extern void *text_poke_copy(void *addr, const void *opcode, size_t len);
+extern void *text_poke_set(void *addr, int c, size_t len);
extern int poke_int3_handler(struct pt_regs *regs);
extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate);
@@ -96,24 +98,40 @@ union text_poke_insn {
};
static __always_inline
-void *text_gen_insn(u8 opcode, const void *addr, const void *dest)
+void __text_gen_insn(void *buf, u8 opcode, const void *addr, const void *dest, int size)
{
- static union text_poke_insn insn; /* per instance */
- int size = text_opcode_size(opcode);
+ union text_poke_insn *insn = buf;
+
+ BUG_ON(size < text_opcode_size(opcode));
+
+ /*
+ * Hide the addresses to avoid the compiler folding in constants when
+ * referencing code, these can mess up annotations like
+ * ANNOTATE_NOENDBR.
+ */
+ OPTIMIZER_HIDE_VAR(insn);
+ OPTIMIZER_HIDE_VAR(addr);
+ OPTIMIZER_HIDE_VAR(dest);
- insn.opcode = opcode;
+ insn->opcode = opcode;
if (size > 1) {
- insn.disp = (long)dest - (long)(addr + size);
+ insn->disp = (long)dest - (long)(addr + size);
if (size == 2) {
/*
- * Ensure that for JMP9 the displacement
+ * Ensure that for JMP8 the displacement
* actually fits the signed byte.
*/
- BUG_ON((insn.disp >> 31) != (insn.disp >> 7));
+ BUG_ON((insn->disp >> 31) != (insn->disp >> 7));
}
}
+}
+static __always_inline
+void *text_gen_insn(u8 opcode, const void *addr, const void *dest)
+{
+ static union text_poke_insn insn; /* per instance */
+ __text_gen_insn(&insn, opcode, addr, dest, text_opcode_size(opcode));
return &insn.text;
}
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index ebec69c35e95..f0cb881c1d69 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -92,7 +92,6 @@ struct thread_info {
#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_NOTIFY_SIGNAL 17 /* signal notifications exist */
-#define TIF_SLD 18 /* Restore split lock detection on context switch */
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
@@ -116,7 +115,6 @@ struct thread_info {
#define _TIF_NOCPUID (1 << TIF_NOCPUID)
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
-#define _TIF_SLD (1 << TIF_SLD)
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
@@ -128,7 +126,7 @@ struct thread_info {
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW_BASE \
(_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP | \
- _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE | _TIF_SLD)
+ _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE)
/*
* Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated.
diff --git a/arch/x86/include/asm/timex.h b/arch/x86/include/asm/timex.h
index a4a8b1b16c0c..956e4145311b 100644
--- a/arch/x86/include/asm/timex.h
+++ b/arch/x86/include/asm/timex.h
@@ -5,6 +5,15 @@
#include <asm/processor.h>
#include <asm/tsc.h>
+static inline unsigned long random_get_entropy(void)
+{
+ if (!IS_ENABLED(CONFIG_X86_TSC) &&
+ !cpu_feature_enabled(X86_FEATURE_TSC))
+ return random_get_entropy_fallback();
+ return rdtsc();
+}
+#define random_get_entropy random_get_entropy
+
/* Assume we use the PIT time source for the clock tick */
#define CLOCK_TICK_RATE PIT_TICK_RATE
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 1bfe979bb9bc..580636cdc257 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -2,9 +2,6 @@
#ifndef _ASM_X86_TLB_H
#define _ASM_X86_TLB_H
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-
#define tlb_flush tlb_flush
static inline void tlb_flush(struct mmu_gather *tlb);
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index b587a9ee9cb2..cda3118f3b27 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -16,6 +16,7 @@
void __flush_tlb_all(void);
#define TLB_FLUSH_ALL -1UL
+#define TLB_GENERATION_INVALID 0
void cr4_update_irqsoff(unsigned long set, unsigned long clear);
unsigned long cr4_read_shadow(void);
@@ -259,6 +260,108 @@ static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
+static inline bool pte_flags_need_flush(unsigned long oldflags,
+ unsigned long newflags,
+ bool ignore_access)
+{
+ /*
+ * Flags that require a flush when cleared but not when they are set.
+ * Only include flags that would not trigger spurious page-faults.
+ * Non-present entries are not cached. Hardware would set the
+ * dirty/access bit if needed without a fault.
+ */
+ const pteval_t flush_on_clear = _PAGE_DIRTY | _PAGE_PRESENT |
+ _PAGE_ACCESSED;
+ const pteval_t software_flags = _PAGE_SOFTW1 | _PAGE_SOFTW2 |
+ _PAGE_SOFTW3 | _PAGE_SOFTW4;
+ const pteval_t flush_on_change = _PAGE_RW | _PAGE_USER | _PAGE_PWT |
+ _PAGE_PCD | _PAGE_PSE | _PAGE_GLOBAL | _PAGE_PAT |
+ _PAGE_PAT_LARGE | _PAGE_PKEY_BIT0 | _PAGE_PKEY_BIT1 |
+ _PAGE_PKEY_BIT2 | _PAGE_PKEY_BIT3 | _PAGE_NX;
+ unsigned long diff = oldflags ^ newflags;
+
+ BUILD_BUG_ON(flush_on_clear & software_flags);
+ BUILD_BUG_ON(flush_on_clear & flush_on_change);
+ BUILD_BUG_ON(flush_on_change & software_flags);
+
+ /* Ignore software flags */
+ diff &= ~software_flags;
+
+ if (ignore_access)
+ diff &= ~_PAGE_ACCESSED;
+
+ /*
+ * Did any of the 'flush_on_clear' flags was clleared set from between
+ * 'oldflags' and 'newflags'?
+ */
+ if (diff & oldflags & flush_on_clear)
+ return true;
+
+ /* Flush on modified flags. */
+ if (diff & flush_on_change)
+ return true;
+
+ /* Ensure there are no flags that were left behind */
+ if (IS_ENABLED(CONFIG_DEBUG_VM) &&
+ (diff & ~(flush_on_clear | software_flags | flush_on_change))) {
+ VM_WARN_ON_ONCE(1);
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * pte_needs_flush() checks whether permissions were demoted and require a
+ * flush. It should only be used for userspace PTEs.
+ */
+static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte)
+{
+ /* !PRESENT -> * ; no need for flush */
+ if (!(pte_flags(oldpte) & _PAGE_PRESENT))
+ return false;
+
+ /* PFN changed ; needs flush */
+ if (pte_pfn(oldpte) != pte_pfn(newpte))
+ return true;
+
+ /*
+ * check PTE flags; ignore access-bit; see comment in
+ * ptep_clear_flush_young().
+ */
+ return pte_flags_need_flush(pte_flags(oldpte), pte_flags(newpte),
+ true);
+}
+#define pte_needs_flush pte_needs_flush
+
+/*
+ * huge_pmd_needs_flush() checks whether permissions were demoted and require a
+ * flush. It should only be used for userspace huge PMDs.
+ */
+static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
+{
+ /* !PRESENT -> * ; no need for flush */
+ if (!(pmd_flags(oldpmd) & _PAGE_PRESENT))
+ return false;
+
+ /* PFN changed ; needs flush */
+ if (pmd_pfn(oldpmd) != pmd_pfn(newpmd))
+ return true;
+
+ /*
+ * check PMD flags; do not ignore access-bit; see
+ * pmdp_clear_flush_young().
+ */
+ return pte_flags_need_flush(pmd_flags(oldpmd), pmd_flags(newpmd),
+ false);
+}
+#define huge_pmd_needs_flush huge_pmd_needs_flush
+
#endif /* !MODULE */
+static inline void __native_tlb_flush_global(unsigned long cr4)
+{
+ native_write_cr4(cr4 ^ X86_CR4_PGE);
+ native_write_cr4(cr4);
+}
#endif /* _ASM_X86_TLBFLUSH_H */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index cc164777e661..458c891a8273 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -110,6 +110,7 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu);
#define topology_logical_die_id(cpu) (cpu_data(cpu).logical_die_id)
#define topology_die_id(cpu) (cpu_data(cpu).cpu_die_id)
#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id)
+#define topology_ppin(cpu) (cpu_data(cpu).ppin)
extern unsigned int __max_die_per_package;
@@ -211,19 +212,19 @@ static inline long arch_scale_freq_capacity(int cpu)
}
#define arch_scale_freq_capacity arch_scale_freq_capacity
-extern void arch_scale_freq_tick(void);
-#define arch_scale_freq_tick arch_scale_freq_tick
-
extern void arch_set_max_freq_ratio(bool turbo_disabled);
+extern void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled);
#else
-static inline void arch_set_max_freq_ratio(bool turbo_disabled)
-{
-}
+static inline void arch_set_max_freq_ratio(bool turbo_disabled) { }
+static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled) { }
#endif
+extern void arch_scale_freq_tick(void);
+#define arch_scale_freq_tick arch_scale_freq_tick
+
#ifdef CONFIG_ACPI_CPPC_LIB
void init_freq_invariance_cppc(void);
-#define init_freq_invariance_cppc init_freq_invariance_cppc
+#define arch_init_invariance_cppc init_freq_invariance_cppc
#endif
#endif /* _ASM_X86_TOPOLOGY_H */
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 6221be7cafc3..47ecfff2c83d 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -13,11 +13,13 @@
#ifdef CONFIG_X86_64
asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
asmlinkage __visible notrace
-struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s);
+struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs);
void __init trap_init(void);
asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs);
#endif
+extern bool ibt_selftest(void);
+
#ifdef CONFIG_X86_F00F_BUG
/* For handling the FOOF bug */
void handle_invalid_op(struct pt_regs *regs);
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 01a300a9700b..fbdc3d951494 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -20,13 +20,12 @@ extern void disable_TSC(void);
static inline cycles_t get_cycles(void)
{
-#ifndef CONFIG_X86_TSC
- if (!boot_cpu_has(X86_FEATURE_TSC))
+ if (!IS_ENABLED(CONFIG_X86_TSC) &&
+ !cpu_feature_enabled(X86_FEATURE_TSC))
return 0;
-#endif
-
return rdtsc();
}
+#define get_cycles get_cycles
extern struct system_counterval_t convert_art_to_tsc(u64 art);
extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 33a68407def3..1cc756eafa44 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -5,6 +5,7 @@
* User space memory access functions
*/
#include <linux/compiler.h>
+#include <linux/instrumented.h>
#include <linux/kasan-checks.h>
#include <linux/string.h>
#include <asm/asm.h>
@@ -12,35 +13,6 @@
#include <asm/smap.h>
#include <asm/extable.h>
-/*
- * Test whether a block of memory is a valid user space address.
- * Returns 0 if the range is valid, nonzero otherwise.
- */
-static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, unsigned long limit)
-{
- /*
- * If we have used "sizeof()" for the size,
- * we know it won't overflow the limit (but
- * it might overflow the 'addr', so it's
- * important to subtract the size from the
- * limit, not add it to the address).
- */
- if (__builtin_constant_p(size))
- return unlikely(addr > limit - size);
-
- /* Arbitrary sizes? Be careful about overflow */
- addr += size;
- if (unlikely(addr < size))
- return true;
- return unlikely(addr > limit);
-}
-
-#define __range_not_ok(addr, size, limit) \
-({ \
- __chk_user_ptr(addr); \
- __chk_range_not_ok((unsigned long __force)(addr), size, limit); \
-})
-
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
static inline bool pagefault_disabled(void);
# define WARN_ON_IN_IRQ() \
@@ -69,9 +41,11 @@ static inline bool pagefault_disabled(void);
#define access_ok(addr, size) \
({ \
WARN_ON_IN_IRQ(); \
- likely(!__range_not_ok(addr, size, TASK_SIZE_MAX)); \
+ likely(__access_ok(addr, size)); \
})
+#include <asm-generic/access_ok.h>
+
extern int __get_user_1(void);
extern int __get_user_2(void);
extern int __get_user_4(void);
@@ -130,6 +104,7 @@ extern int __get_user_bad(void);
: "=a" (__ret_gu), "=r" (__val_gu), \
ASM_CALL_CONSTRAINT \
: "0" (ptr), "i" (sizeof(*(ptr)))); \
+ instrument_get_user(__val_gu); \
(x) = (__force __typeof__(*(ptr))) __val_gu; \
__builtin_expect(__ret_gu, 0); \
})
@@ -219,9 +194,11 @@ extern void __put_user_nocheck_8(void);
int __ret_pu; \
void __user *__ptr_pu; \
register __typeof__(*(ptr)) __val_pu asm("%"_ASM_AX); \
- __chk_user_ptr(ptr); \
- __ptr_pu = (ptr); \
- __val_pu = (x); \
+ __typeof__(*(ptr)) __x = (x); /* eval x once */ \
+ __typeof__(ptr) __ptr = (ptr); /* eval ptr once */ \
+ __chk_user_ptr(__ptr); \
+ __ptr_pu = __ptr; \
+ __val_pu = __x; \
asm volatile("call __" #fn "_%P[size]" \
: "=c" (__ret_pu), \
ASM_CALL_CONSTRAINT \
@@ -229,6 +206,7 @@ extern void __put_user_nocheck_8(void);
"r" (__val_pu), \
[size] "i" (sizeof(*(ptr))) \
:"ebx"); \
+ instrument_put_user(__x, __ptr, sizeof(*(ptr))); \
__builtin_expect(__ret_pu, 0); \
})
@@ -275,23 +253,26 @@ extern void __put_user_nocheck_8(void);
#define __put_user_size(x, ptr, size, label) \
do { \
- __chk_user_ptr(ptr); \
+ __typeof__(*(ptr)) __x = (x); /* eval x once */ \
+ __typeof__(ptr) __ptr = (ptr); /* eval ptr once */ \
+ __chk_user_ptr(__ptr); \
switch (size) { \
case 1: \
- __put_user_goto(x, ptr, "b", "iq", label); \
+ __put_user_goto(__x, __ptr, "b", "iq", label); \
break; \
case 2: \
- __put_user_goto(x, ptr, "w", "ir", label); \
+ __put_user_goto(__x, __ptr, "w", "ir", label); \
break; \
case 4: \
- __put_user_goto(x, ptr, "l", "ir", label); \
+ __put_user_goto(__x, __ptr, "l", "ir", label); \
break; \
case 8: \
- __put_user_goto_u64(x, ptr, label); \
+ __put_user_goto_u64(__x, __ptr, label); \
break; \
default: \
__put_user_bad(); \
} \
+ instrument_put_user(__x, __ptr, size); \
} while (0)
#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
@@ -314,11 +295,12 @@ do { \
do { \
__chk_user_ptr(ptr); \
switch (size) { \
- unsigned char x_u8__; \
- case 1: \
+ case 1: { \
+ unsigned char x_u8__; \
__get_user_asm(x_u8__, ptr, "b", "=q", label); \
(x) = x_u8__; \
break; \
+ } \
case 2: \
__get_user_asm(x, ptr, "w", "=r", label); \
break; \
@@ -331,6 +313,7 @@ do { \
default: \
(x) = __get_user_bad(); \
} \
+ instrument_get_user(x); \
} while (0)
#define __get_user_asm(x, addr, itype, ltype, label) \
@@ -351,24 +334,22 @@ do { \
"1: movl %[lowbits],%%eax\n" \
"2: movl %[highbits],%%edx\n" \
"3:\n" \
- ".section .fixup,\"ax\"\n" \
- "4: mov %[efault],%[errout]\n" \
- " xorl %%eax,%%eax\n" \
- " xorl %%edx,%%edx\n" \
- " jmp 3b\n" \
- ".previous\n" \
- _ASM_EXTABLE_UA(1b, 4b) \
- _ASM_EXTABLE_UA(2b, 4b) \
+ _ASM_EXTABLE_TYPE_REG(1b, 3b, EX_TYPE_EFAULT_REG | \
+ EX_FLAG_CLEAR_AX_DX, \
+ %[errout]) \
+ _ASM_EXTABLE_TYPE_REG(2b, 3b, EX_TYPE_EFAULT_REG | \
+ EX_FLAG_CLEAR_AX_DX, \
+ %[errout]) \
: [errout] "=r" (retval), \
[output] "=&A"(x) \
: [lowbits] "m" (__m(__ptr)), \
[highbits] "m" __m(((u32 __user *)(__ptr)) + 1), \
- [efault] "i" (-EFAULT), "0" (retval)); \
+ "0" (retval)); \
})
#else
#define __get_user_asm_u64(x, ptr, retval) \
- __get_user_asm(x, ptr, retval, "q", "=r")
+ __get_user_asm(x, ptr, retval, "q")
#endif
#define __get_user_size(x, ptr, size, retval) \
@@ -379,14 +360,14 @@ do { \
__chk_user_ptr(ptr); \
switch (size) { \
case 1: \
- __get_user_asm(x_u8__, ptr, retval, "b", "=q"); \
+ __get_user_asm(x_u8__, ptr, retval, "b"); \
(x) = x_u8__; \
break; \
case 2: \
- __get_user_asm(x, ptr, retval, "w", "=r"); \
+ __get_user_asm(x, ptr, retval, "w"); \
break; \
case 4: \
- __get_user_asm(x, ptr, retval, "l", "=r"); \
+ __get_user_asm(x, ptr, retval, "l"); \
break; \
case 8: \
__get_user_asm_u64(x, ptr, retval); \
@@ -396,23 +377,118 @@ do { \
} \
} while (0)
-#define __get_user_asm(x, addr, err, itype, ltype) \
+#define __get_user_asm(x, addr, err, itype) \
asm volatile("\n" \
"1: mov"itype" %[umem],%[output]\n" \
"2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: mov %[efault],%[errout]\n" \
- " xorl %k[output],%k[output]\n" \
- " jmp 2b\n" \
- ".previous\n" \
- _ASM_EXTABLE_UA(1b, 3b) \
+ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG | \
+ EX_FLAG_CLEAR_AX, \
+ %[errout]) \
: [errout] "=r" (err), \
- [output] ltype(x) \
+ [output] "=a" (x) \
: [umem] "m" (__m(addr)), \
- [efault] "i" (-EFAULT), "0" (err))
+ "0" (err))
#endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+#ifdef CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT
+#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \
+ bool success; \
+ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
+ __typeof__(*(_ptr)) __old = *_old; \
+ __typeof__(*(_ptr)) __new = (_new); \
+ asm_volatile_goto("\n" \
+ "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\
+ _ASM_EXTABLE_UA(1b, %l[label]) \
+ : CC_OUT(z) (success), \
+ [ptr] "+m" (*_ptr), \
+ [old] "+a" (__old) \
+ : [new] ltype (__new) \
+ : "memory" \
+ : label); \
+ if (unlikely(!success)) \
+ *_old = __old; \
+ likely(success); })
+
+#ifdef CONFIG_X86_32
+#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \
+ bool success; \
+ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
+ __typeof__(*(_ptr)) __old = *_old; \
+ __typeof__(*(_ptr)) __new = (_new); \
+ asm_volatile_goto("\n" \
+ "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \
+ _ASM_EXTABLE_UA(1b, %l[label]) \
+ : CC_OUT(z) (success), \
+ "+A" (__old), \
+ [ptr] "+m" (*_ptr) \
+ : "b" ((u32)__new), \
+ "c" ((u32)((u64)__new >> 32)) \
+ : "memory" \
+ : label); \
+ if (unlikely(!success)) \
+ *_old = __old; \
+ likely(success); })
+#endif // CONFIG_X86_32
+#else // !CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT
+#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \
+ int __err = 0; \
+ bool success; \
+ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
+ __typeof__(*(_ptr)) __old = *_old; \
+ __typeof__(*(_ptr)) __new = (_new); \
+ asm volatile("\n" \
+ "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\
+ CC_SET(z) \
+ "2:\n" \
+ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, \
+ %[errout]) \
+ : CC_OUT(z) (success), \
+ [errout] "+r" (__err), \
+ [ptr] "+m" (*_ptr), \
+ [old] "+a" (__old) \
+ : [new] ltype (__new) \
+ : "memory"); \
+ if (unlikely(__err)) \
+ goto label; \
+ if (unlikely(!success)) \
+ *_old = __old; \
+ likely(success); })
+
+#ifdef CONFIG_X86_32
+/*
+ * Unlike the normal CMPXCHG, use output GPR for both success/fail and error.
+ * There are only six GPRs available and four (EAX, EBX, ECX, and EDX) are
+ * hardcoded by CMPXCHG8B, leaving only ESI and EDI. If the compiler uses
+ * both ESI and EDI for the memory operand, compilation will fail if the error
+ * is an input+output as there will be no register available for input.
+ */
+#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \
+ int __result; \
+ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
+ __typeof__(*(_ptr)) __old = *_old; \
+ __typeof__(*(_ptr)) __new = (_new); \
+ asm volatile("\n" \
+ "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \
+ "mov $0, %[result]\n\t" \
+ "setz %b[result]\n" \
+ "2:\n" \
+ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, \
+ %[result]) \
+ : [result] "=q" (__result), \
+ "+A" (__old), \
+ [ptr] "+m" (*_ptr) \
+ : "b" ((u32)__new), \
+ "c" ((u32)((u64)__new >> 32)) \
+ : "memory", "cc"); \
+ if (unlikely(__result < 0)) \
+ goto label; \
+ if (unlikely(!__result)) \
+ *_old = __old; \
+ likely(__result); })
+#endif // CONFIG_X86_32
+#endif // CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT
+
/* FIXME: this hack is definitely wrong -AK */
struct __large_struct { unsigned long buf[100]; };
#define __m(x) (*(struct __large_struct __user *)(x))
@@ -436,9 +512,6 @@ strncpy_from_user(char *dst, const char __user *src, long count);
extern __must_check long strnlen_user(const char __user *str, long n);
-unsigned long __must_check clear_user(void __user *mem, unsigned long len);
-unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
-
#ifdef CONFIG_ARCH_HAS_COPY_MC
unsigned long __must_check
copy_mc_to_kernel(void *to, const void *from, unsigned len);
@@ -460,6 +533,8 @@ extern struct movsl_mask {
#define ARCH_HAS_NOCACHE_UACCESS 1
#ifdef CONFIG_X86_32
+unsigned long __must_check clear_user(void __user *mem, unsigned long len);
+unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
# include <asm/uaccess_32.h>
#else
# include <asm/uaccess_64.h>
@@ -505,6 +580,51 @@ do { \
} while (0)
#endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+extern void __try_cmpxchg_user_wrong_size(void);
+
+#ifndef CONFIG_X86_32
+#define __try_cmpxchg64_user_asm(_ptr, _oldp, _nval, _label) \
+ __try_cmpxchg_user_asm("q", "r", (_ptr), (_oldp), (_nval), _label)
+#endif
+
+/*
+ * Force the pointer to u<size> to match the size expected by the asm helper.
+ * clang/LLVM compiles all cases and only discards the unused paths after
+ * processing errors, which breaks i386 if the pointer is an 8-byte value.
+ */
+#define unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label) ({ \
+ bool __ret; \
+ __chk_user_ptr(_ptr); \
+ switch (sizeof(*(_ptr))) { \
+ case 1: __ret = __try_cmpxchg_user_asm("b", "q", \
+ (__force u8 *)(_ptr), (_oldp), \
+ (_nval), _label); \
+ break; \
+ case 2: __ret = __try_cmpxchg_user_asm("w", "r", \
+ (__force u16 *)(_ptr), (_oldp), \
+ (_nval), _label); \
+ break; \
+ case 4: __ret = __try_cmpxchg_user_asm("l", "r", \
+ (__force u32 *)(_ptr), (_oldp), \
+ (_nval), _label); \
+ break; \
+ case 8: __ret = __try_cmpxchg64_user_asm((__force u64 *)(_ptr), (_oldp),\
+ (_nval), _label); \
+ break; \
+ default: __try_cmpxchg_user_wrong_size(); \
+ } \
+ __ret; })
+
+/* "Returns" 0 on success, 1 on failure, -EFAULT if the access faults. */
+#define __try_cmpxchg_user(_ptr, _oldp, _nval, _label) ({ \
+ int __ret = -EFAULT; \
+ __uaccess_begin_nospec(); \
+ __ret = !unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label); \
+_label: \
+ __uaccess_end(); \
+ __ret; \
+ })
+
/*
* We want the unsafe accessors to always be inlined and use
* the error labels - thus the macro games.
@@ -528,8 +648,6 @@ do { \
unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label); \
} while (0)
-#define HAVE_GET_KERNEL_NOFAULT
-
#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
#define __get_kernel_nofault(dst, src, type, err_label) \
__get_user_size(*((type *)(dst)), (__force type __user *)(src), \
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 45697e04d771..d13d71af5cf6 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -79,4 +79,49 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
kasan_check_write(dst, size);
return __copy_user_flushcache(dst, src, size);
}
+
+/*
+ * Zero Userspace.
+ */
+
+__must_check unsigned long
+clear_user_original(void __user *addr, unsigned long len);
+__must_check unsigned long
+clear_user_rep_good(void __user *addr, unsigned long len);
+__must_check unsigned long
+clear_user_erms(void __user *addr, unsigned long len);
+
+static __always_inline __must_check unsigned long __clear_user(void __user *addr, unsigned long size)
+{
+ might_fault();
+ stac();
+
+ /*
+ * No memory constraint because it doesn't change any memory gcc
+ * knows about.
+ */
+ asm volatile(
+ "1:\n\t"
+ ALTERNATIVE_3("rep stosb",
+ "call clear_user_erms", ALT_NOT(X86_FEATURE_FSRM),
+ "call clear_user_rep_good", ALT_NOT(X86_FEATURE_ERMS),
+ "call clear_user_original", ALT_NOT(X86_FEATURE_REP_GOOD))
+ "2:\n"
+ _ASM_EXTABLE_UA(1b, 2b)
+ : "+c" (size), "+D" (addr), ASM_CALL_CONSTRAINT
+ : "a" (0)
+ /* rep_good clobbers %rdx */
+ : "rdx");
+
+ clac();
+
+ return size;
+}
+
+static __always_inline unsigned long clear_user(void __user *to, unsigned long n)
+{
+ if (access_ok(to, n))
+ return __clear_user(to, n);
+ return n;
+}
#endif /* _ASM_X86_UACCESS_64_H */
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 80e9d5206a71..761173ccc33c 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -22,6 +22,7 @@
# include <asm/unistd_32_ia32.h>
# define __ARCH_WANT_SYS_TIME
# define __ARCH_WANT_SYS_UTIME
+# define __ARCH_WANT_COMPAT_STAT
# define __ARCH_WANT_COMPAT_SYS_PREADV64
# define __ARCH_WANT_COMPAT_SYS_PWRITEV64
# define __ARCH_WANT_COMPAT_SYS_PREADV64V2
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index 2a1f8734416d..7cede4dc21f0 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -4,7 +4,7 @@
#include <linux/sched.h>
#include <linux/ftrace.h>
-#include <linux/kprobes.h>
+#include <linux/rethook.h>
#include <asm/ptrace.h>
#include <asm/stacktrace.h>
@@ -16,7 +16,7 @@ struct unwind_state {
unsigned long stack_mask;
struct task_struct *task;
int graph_idx;
-#ifdef CONFIG_KRETPROBES
+#if defined(CONFIG_RETHOOK)
struct llist_node *kr_cur;
#endif
bool error;
@@ -104,19 +104,18 @@ void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
#endif
static inline
-unsigned long unwind_recover_kretprobe(struct unwind_state *state,
- unsigned long addr, unsigned long *addr_p)
+unsigned long unwind_recover_rethook(struct unwind_state *state,
+ unsigned long addr, unsigned long *addr_p)
{
-#ifdef CONFIG_KRETPROBES
- return is_kretprobe_trampoline(addr) ?
- kretprobe_find_ret_addr(state->task, addr_p, &state->kr_cur) :
- addr;
-#else
- return addr;
+#ifdef CONFIG_RETHOOK
+ if (is_rethook_trampoline(addr))
+ return rethook_find_ret_addr(state->task, (unsigned long)addr_p,
+ &state->kr_cur);
#endif
+ return addr;
}
-/* Recover the return address modified by kretprobe and ftrace_graph. */
+/* Recover the return address modified by rethook and ftrace_graph. */
static inline
unsigned long unwind_recover_ret_addr(struct unwind_state *state,
unsigned long addr, unsigned long *addr_p)
@@ -125,7 +124,7 @@ unsigned long unwind_recover_ret_addr(struct unwind_state *state,
ret = ftrace_graph_ret_addr(state->task, &state->graph_idx,
addr, addr_p);
- return unwind_recover_kretprobe(state, ret, addr_p);
+ return unwind_recover_rethook(state, ret, addr_p);
}
/*
diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
index 8b33674288ea..f66fbe6537dd 100644
--- a/arch/x86/include/asm/unwind_hints.h
+++ b/arch/x86/include/asm/unwind_hints.h
@@ -8,7 +8,11 @@
#ifdef __ASSEMBLY__
.macro UNWIND_HINT_EMPTY
- UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1
+ UNWIND_HINT type=UNWIND_HINT_TYPE_CALL end=1
+.endm
+
+.macro UNWIND_HINT_ENTRY
+ UNWIND_HINT type=UNWIND_HINT_TYPE_ENTRY end=1
.endm
.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0
@@ -52,6 +56,14 @@
UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC
.endm
+.macro UNWIND_HINT_SAVE
+ UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE
+.endm
+
+.macro UNWIND_HINT_RESTORE
+ UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE
+.endm
+
#else
#define UNWIND_HINT_FUNC \
diff --git a/arch/x86/include/asm/user_32.h b/arch/x86/include/asm/user_32.h
index d72c3d66e94f..8963915e533f 100644
--- a/arch/x86/include/asm/user_32.h
+++ b/arch/x86/include/asm/user_32.h
@@ -124,9 +124,5 @@ struct user{
char u_comm[32]; /* User command that was responsible */
int u_debugreg[8];
};
-#define NBPG PAGE_SIZE
-#define UPAGES 1
-#define HOST_TEXT_START_ADDR (u.start_code)
-#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG)
#endif /* _ASM_X86_USER_32_H */
diff --git a/arch/x86/include/asm/user_64.h b/arch/x86/include/asm/user_64.h
index db909923611c..1dd10f07ccd6 100644
--- a/arch/x86/include/asm/user_64.h
+++ b/arch/x86/include/asm/user_64.h
@@ -130,9 +130,5 @@ struct user {
unsigned long error_code; /* CPU error code or 0 */
unsigned long fault_address; /* CR3 or 0 */
};
-#define NBPG PAGE_SIZE
-#define UPAGES 1
-#define HOST_TEXT_START_ADDR (u.start_code)
-#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG)
#endif /* _ASM_X86_USER_64_H */
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 98aa103eb4ab..2963a2f5dbc4 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -37,7 +37,7 @@ struct vdso_image {
extern const struct vdso_image vdso_image_64;
#endif
-#ifdef CONFIG_X86_X32
+#ifdef CONFIG_X86_X32_ABI
extern const struct vdso_image vdso_image_x32;
#endif
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 0ffaa3156a4e..498dc600bd5c 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -31,6 +31,7 @@
#define CPU_BASED_RDTSC_EXITING VMCS_CONTROL_BIT(RDTSC_EXITING)
#define CPU_BASED_CR3_LOAD_EXITING VMCS_CONTROL_BIT(CR3_LOAD_EXITING)
#define CPU_BASED_CR3_STORE_EXITING VMCS_CONTROL_BIT(CR3_STORE_EXITING)
+#define CPU_BASED_ACTIVATE_TERTIARY_CONTROLS VMCS_CONTROL_BIT(TERTIARY_CONTROLS)
#define CPU_BASED_CR8_LOAD_EXITING VMCS_CONTROL_BIT(CR8_LOAD_EXITING)
#define CPU_BASED_CR8_STORE_EXITING VMCS_CONTROL_BIT(CR8_STORE_EXITING)
#define CPU_BASED_TPR_SHADOW VMCS_CONTROL_BIT(VIRTUAL_TPR)
@@ -74,6 +75,12 @@
#define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING)
#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE VMCS_CONTROL_BIT(USR_WAIT_PAUSE)
#define SECONDARY_EXEC_BUS_LOCK_DETECTION VMCS_CONTROL_BIT(BUS_LOCK_DETECTION)
+#define SECONDARY_EXEC_NOTIFY_VM_EXITING VMCS_CONTROL_BIT(NOTIFY_VM_EXITING)
+
+/*
+ * Definitions of Tertiary Processor-Based VM-Execution Controls.
+ */
+#define TERTIARY_EXEC_IPI_VIRT VMCS_CONTROL_BIT(IPI_VIRT)
#define PIN_BASED_EXT_INTR_MASK VMCS_CONTROL_BIT(INTR_EXITING)
#define PIN_BASED_NMI_EXITING VMCS_CONTROL_BIT(NMI_EXITING)
@@ -158,6 +165,7 @@ static inline int vmx_misc_mseg_revid(u64 vmx_misc)
enum vmcs_field {
VIRTUAL_PROCESSOR_ID = 0x00000000,
POSTED_INTR_NV = 0x00000002,
+ LAST_PID_POINTER_INDEX = 0x00000008,
GUEST_ES_SELECTOR = 0x00000800,
GUEST_CS_SELECTOR = 0x00000802,
GUEST_SS_SELECTOR = 0x00000804,
@@ -221,6 +229,10 @@ enum vmcs_field {
ENCLS_EXITING_BITMAP_HIGH = 0x0000202F,
TSC_MULTIPLIER = 0x00002032,
TSC_MULTIPLIER_HIGH = 0x00002033,
+ TERTIARY_VM_EXEC_CONTROL = 0x00002034,
+ TERTIARY_VM_EXEC_CONTROL_HIGH = 0x00002035,
+ PID_POINTER_TABLE = 0x00002042,
+ PID_POINTER_TABLE_HIGH = 0x00002043,
GUEST_PHYSICAL_ADDRESS = 0x00002400,
GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
VMCS_LINK_POINTER = 0x00002800,
@@ -269,6 +281,7 @@ enum vmcs_field {
SECONDARY_VM_EXEC_CONTROL = 0x0000401e,
PLE_GAP = 0x00004020,
PLE_WINDOW = 0x00004022,
+ NOTIFY_WINDOW = 0x00004024,
VM_INSTRUCTION_ERROR = 0x00004400,
VM_EXIT_REASON = 0x00004402,
VM_EXIT_INTR_INFO = 0x00004404,
@@ -296,7 +309,7 @@ enum vmcs_field {
GUEST_LDTR_AR_BYTES = 0x00004820,
GUEST_TR_AR_BYTES = 0x00004822,
GUEST_INTERRUPTIBILITY_INFO = 0x00004824,
- GUEST_ACTIVITY_STATE = 0X00004826,
+ GUEST_ACTIVITY_STATE = 0x00004826,
GUEST_SYSENTER_CS = 0x0000482A,
VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
HOST_IA32_SYSENTER_CS = 0x00004c00,
@@ -543,19 +556,22 @@ enum vm_entry_failure_code {
#define EPT_VIOLATION_ACC_READ_BIT 0
#define EPT_VIOLATION_ACC_WRITE_BIT 1
#define EPT_VIOLATION_ACC_INSTR_BIT 2
-#define EPT_VIOLATION_READABLE_BIT 3
-#define EPT_VIOLATION_WRITABLE_BIT 4
-#define EPT_VIOLATION_EXECUTABLE_BIT 5
+#define EPT_VIOLATION_RWX_SHIFT 3
+#define EPT_VIOLATION_GVA_IS_VALID_BIT 7
#define EPT_VIOLATION_GVA_TRANSLATED_BIT 8
#define EPT_VIOLATION_ACC_READ (1 << EPT_VIOLATION_ACC_READ_BIT)
#define EPT_VIOLATION_ACC_WRITE (1 << EPT_VIOLATION_ACC_WRITE_BIT)
#define EPT_VIOLATION_ACC_INSTR (1 << EPT_VIOLATION_ACC_INSTR_BIT)
-#define EPT_VIOLATION_READABLE (1 << EPT_VIOLATION_READABLE_BIT)
-#define EPT_VIOLATION_WRITABLE (1 << EPT_VIOLATION_WRITABLE_BIT)
-#define EPT_VIOLATION_EXECUTABLE (1 << EPT_VIOLATION_EXECUTABLE_BIT)
+#define EPT_VIOLATION_RWX_MASK (VMX_EPT_RWX_MASK << EPT_VIOLATION_RWX_SHIFT)
+#define EPT_VIOLATION_GVA_IS_VALID (1 << EPT_VIOLATION_GVA_IS_VALID_BIT)
#define EPT_VIOLATION_GVA_TRANSLATED (1 << EPT_VIOLATION_GVA_TRANSLATED_BIT)
/*
+ * Exit Qualifications for NOTIFY VM EXIT
+ */
+#define NOTIFY_VM_CONTEXT_INVALID BIT(0)
+
+/*
* VM-instruction error numbers
*/
enum vm_instruction_error_number {
diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h
index d9a74681a77d..c6a7eed03914 100644
--- a/arch/x86/include/asm/vmxfeatures.h
+++ b/arch/x86/include/asm/vmxfeatures.h
@@ -5,7 +5,7 @@
/*
* Defines VMX CPU feature bits
*/
-#define NVMXINTS 3 /* N 32-bit words worth of info */
+#define NVMXINTS 5 /* N 32-bit words worth of info */
/*
* Note: If the comment begins with a quoted string, that string is used
@@ -43,6 +43,7 @@
#define VMX_FEATURE_RDTSC_EXITING ( 1*32+ 12) /* "" VM-Exit on RDTSC */
#define VMX_FEATURE_CR3_LOAD_EXITING ( 1*32+ 15) /* "" VM-Exit on writes to CR3 */
#define VMX_FEATURE_CR3_STORE_EXITING ( 1*32+ 16) /* "" VM-Exit on reads from CR3 */
+#define VMX_FEATURE_TERTIARY_CONTROLS ( 1*32+ 17) /* "" Enable Tertiary VM-Execution Controls */
#define VMX_FEATURE_CR8_LOAD_EXITING ( 1*32+ 19) /* "" VM-Exit on writes to CR8 */
#define VMX_FEATURE_CR8_STORE_EXITING ( 1*32+ 20) /* "" VM-Exit on reads from CR8 */
#define VMX_FEATURE_VIRTUAL_TPR ( 1*32+ 21) /* "vtpr" TPR virtualization, a.k.a. TPR shadow */
@@ -84,5 +85,8 @@
#define VMX_FEATURE_USR_WAIT_PAUSE ( 2*32+ 26) /* Enable TPAUSE, UMONITOR, UMWAIT in guest */
#define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* "" VM-Exit on ENCLV (leaf dependent) */
#define VMX_FEATURE_BUS_LOCK_DETECTION ( 2*32+ 30) /* "" VM-Exit when bus lock caused */
+#define VMX_FEATURE_NOTIFY_VM_EXITING ( 2*32+ 31) /* VM-Exit when no event windows after notify window */
+/* Tertiary Processor-Based VM-Execution Controls, word 3 */
+#define VMX_FEATURE_IPI_VIRT ( 3*32+ 4) /* Enable IPI virtualization */
#endif /* _ASM_X86_VMXFEATURES_H */
diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h
index 06006b0351f3..46b4f1f7f354 100644
--- a/arch/x86/include/asm/word-at-a-time.h
+++ b/arch/x86/include/asm/word-at-a-time.h
@@ -79,27 +79,15 @@ static inline unsigned long find_zero(unsigned long mask)
*/
static inline unsigned long load_unaligned_zeropad(const void *addr)
{
- unsigned long ret, dummy;
+ unsigned long ret;
- asm(
- "1:\tmov %2,%0\n"
+ asm volatile(
+ "1: mov %[mem], %[ret]\n"
"2:\n"
- ".section .fixup,\"ax\"\n"
- "3:\t"
- "lea %2,%1\n\t"
- "and %3,%1\n\t"
- "mov (%1),%0\n\t"
- "leal %2,%%ecx\n\t"
- "andl %4,%%ecx\n\t"
- "shll $3,%%ecx\n\t"
- "shr %%cl,%0\n\t"
- "jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b, 3b)
- :"=&r" (ret),"=&c" (dummy)
- :"m" (*(unsigned long *)addr),
- "i" (-sizeof(unsigned long)),
- "i" (sizeof(unsigned long)-1));
+ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_ZEROPAD)
+ : [ret] "=r" (ret)
+ : [mem] "m" (*(unsigned long *)addr));
+
return ret;
}
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 5c69f7eb5d47..e9170457697e 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -142,6 +142,21 @@ struct x86_init_acpi {
};
/**
+ * struct x86_guest - Functions used by misc guest incarnations like SEV, TDX, etc.
+ *
+ * @enc_status_change_prepare Notify HV before the encryption status of a range is changed
+ * @enc_status_change_finish Notify HV after the encryption status of a range is changed
+ * @enc_tlb_flush_required Returns true if a TLB flush is needed before changing page encryption status
+ * @enc_cache_flush_required Returns true if a cache flush is needed before changing page encryption status
+ */
+struct x86_guest {
+ void (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc);
+ bool (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc);
+ bool (*enc_tlb_flush_required)(bool enc);
+ bool (*enc_cache_flush_required)(void);
+};
+
+/**
* struct x86_init_ops - functions for platform specific setup
*
*/
@@ -287,12 +302,7 @@ struct x86_platform_ops {
struct x86_legacy_features legacy;
void (*set_legacy_features)(void);
struct x86_hyper_runtime hyper;
-};
-
-struct pci_dev;
-
-struct x86_msi_ops {
- void (*restore_msi_irqs)(struct pci_dev *dev);
+ struct x86_guest guest;
};
struct x86_apic_ops {
diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h
index a9630104f1c4..6daa9b0c8d11 100644
--- a/arch/x86/include/asm/xen/cpuid.h
+++ b/arch/x86/include/asm/xen/cpuid.h
@@ -100,6 +100,15 @@
/* Memory mapped from other domains has valid IOMMU entries */
#define XEN_HVM_CPUID_IOMMU_MAPPINGS (1u << 2)
#define XEN_HVM_CPUID_VCPU_ID_PRESENT (1u << 3) /* vcpu id is present in EBX */
+#define XEN_HVM_CPUID_DOMID_PRESENT (1u << 4) /* domid is present in ECX */
+/*
+ * Bits 55:49 from the IO-APIC RTE and bits 11:5 from the MSI address can be
+ * used to store high bits for the Destination ID. This expands the Destination
+ * ID field from 8 to 15 bits, allowing to target APIC IDs up 32768.
+ */
+#define XEN_HVM_CPUID_EXT_DEST_ID (1u << 5)
+/* Per-vCPU event channel upcalls */
+#define XEN_HVM_CPUID_UPCALL_VECTOR (1u << 6)
/*
* Leaf 6 (0x40000x05)
diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h
index 068d9b067c83..62bdceb594f1 100644
--- a/arch/x86/include/asm/xen/events.h
+++ b/arch/x86/include/asm/xen/events.h
@@ -23,7 +23,7 @@ static inline int xen_irqs_disabled(struct pt_regs *regs)
/* No need for a barrier -- XCHG is a barrier on x86. */
#define xchg_xen_ulong(ptr, val) xchg((ptr), (val))
-extern int xen_have_vector_callback;
+extern bool xen_have_vector_callback;
/*
* Events delivered via platform PCI interrupts are always
@@ -34,4 +34,5 @@ static inline bool xen_support_evtchn_rebind(void)
return (!xen_hvm_domain() || xen_have_vector_callback);
}
+extern bool xen_percpu_upcall;
#endif /* _ASM_X86_XEN_EVENTS_H */
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index 5adab895127e..16f548a661cf 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -43,18 +43,12 @@ static inline uint32_t xen_cpuid_base(void)
return hypervisor_cpuid_base("XenVMMXenVMM", 2);
}
-#ifdef CONFIG_XEN
-extern bool __init xen_hvm_need_lapic(void);
+struct pci_dev;
-static inline bool __init xen_x2apic_para_available(void)
-{
- return xen_hvm_need_lapic();
-}
+#ifdef CONFIG_XEN_PV_DOM0
+bool xen_initdom_restore_msi(struct pci_dev *dev);
#else
-static inline bool __init xen_x2apic_para_available(void)
-{
- return (xen_cpuid_base() != 0);
-}
+static inline bool xen_initdom_restore_msi(struct pci_dev *dev) { return true; }
#endif
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/include/asm/xen/page-coherent.h b/arch/x86/include/asm/xen/page-coherent.h
deleted file mode 100644
index 63cd41b2e17a..000000000000
--- a/arch/x86/include/asm/xen/page-coherent.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_XEN_PAGE_COHERENT_H
-#define _ASM_X86_XEN_PAGE_COHERENT_H
-
-#include <asm/page.h>
-#include <linux/dma-mapping.h>
-
-static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags,
- unsigned long attrs)
-{
- void *vstart = (void*)__get_free_pages(flags, get_order(size));
- *dma_handle = virt_to_phys(vstart);
- return vstart;
-}
-
-static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
- void *cpu_addr, dma_addr_t dma_handle,
- unsigned long attrs)
-{
- free_pages((unsigned long) cpu_addr, get_order(size));
-}
-
-#endif /* _ASM_X86_XEN_PAGE_COHERENT_H */
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 1a162e559753..fa9ec20783fa 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -96,11 +96,7 @@ static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val)
asm volatile("1: mov %[val], %[ptr]\n"
"2:\n"
- ".section .fixup, \"ax\"\n"
- "3: sub $1, %[ret]\n"
- " jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b, 3b)
+ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %[ret])
: [ret] "+r" (ret), [ptr] "=m" (*addr)
: [val] "r" (val));
@@ -110,16 +106,12 @@ static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val)
static inline int xen_safe_read_ulong(const unsigned long *addr,
unsigned long *val)
{
- int ret = 0;
unsigned long rval = ~0ul;
+ int ret = 0;
asm volatile("1: mov %[ptr], %[rval]\n"
"2:\n"
- ".section .fixup, \"ax\"\n"
- "3: sub $1, %[ret]\n"
- " jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b, 3b)
+ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %[ret])
: [ret] "+r" (ret), [rval] "+r" (rval)
: [ptr] "m" (*addr));
*val = rval;
@@ -355,9 +347,6 @@ unsigned long arbitrary_virt_to_mfn(void *vaddr);
void make_lowmem_page_readonly(void *vaddr);
void make_lowmem_page_readwrite(void *vaddr);
-#define xen_remap(cookie, size) ioremap((cookie), (size))
-#define xen_unmap(cookie) iounmap((cookie))
-
static inline bool xen_arch_need_swiotlb(struct device *dev,
phys_addr_t phys,
dma_addr_t dev_addr)
@@ -365,9 +354,4 @@ static inline bool xen_arch_need_swiotlb(struct device *dev,
return false;
}
-static inline unsigned long xen_get_swiotlb_free_pages(unsigned int order)
-{
- return __get_free_pages(__GFP_NOWARN, order);
-}
-
#endif /* _ASM_X86_XEN_PAGE_H */
diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h
index 66b4ddde7743..77a2d19cc990 100644
--- a/arch/x86/include/asm/xen/swiotlb-xen.h
+++ b/arch/x86/include/asm/xen/swiotlb-xen.h
@@ -3,11 +3,15 @@
#define _ASM_X86_SWIOTLB_XEN_H
#ifdef CONFIG_SWIOTLB_XEN
-extern int __init pci_xen_swiotlb_detect(void);
extern int pci_xen_swiotlb_init_late(void);
#else
-#define pci_xen_swiotlb_detect NULL
static inline int pci_xen_swiotlb_init_late(void) { return -ENXIO; }
#endif
+int xen_swiotlb_fixup(void *buf, unsigned long nslabs);
+int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
+ unsigned int address_bits,
+ dma_addr_t *dma_handle);
+void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order);
+
#endif /* _ASM_X86_SWIOTLB_XEN_H */
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h
index 2ee95a7769e6..7b0307acc410 100644
--- a/arch/x86/include/asm/xor.h
+++ b/arch/x86/include/asm/xor.h
@@ -57,7 +57,8 @@
op(i + 3, 3)
static void
-xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_sse_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
unsigned long lines = bytes >> 8;
@@ -108,7 +109,8 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
}
static void
-xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_sse_2_pf64(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
unsigned long lines = bytes >> 8;
@@ -142,8 +144,9 @@ xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2)
}
static void
-xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
+xor_sse_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
unsigned long lines = bytes >> 8;
@@ -201,8 +204,9 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
+xor_sse_3_pf64(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
unsigned long lines = bytes >> 8;
@@ -238,8 +242,10 @@ xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
+xor_sse_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
unsigned long lines = bytes >> 8;
@@ -304,8 +310,10 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
+xor_sse_4_pf64(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
unsigned long lines = bytes >> 8;
@@ -343,8 +351,11 @@ xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_sse_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
{
unsigned long lines = bytes >> 8;
@@ -416,8 +427,11 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_sse_5_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_sse_5_pf64(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
{
unsigned long lines = bytes >> 8;
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index 67ceb790e639..7a6b9474591e 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -21,7 +21,8 @@
#include <asm/fpu/api.h>
static void
-xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_pII_mmx_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
unsigned long lines = bytes >> 7;
@@ -64,8 +65,9 @@ xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
}
static void
-xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
+xor_pII_mmx_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
unsigned long lines = bytes >> 7;
@@ -113,8 +115,10 @@ xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
+xor_pII_mmx_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
unsigned long lines = bytes >> 7;
@@ -168,8 +172,11 @@ xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
static void
-xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_pII_mmx_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
{
unsigned long lines = bytes >> 7;
@@ -248,7 +255,8 @@ xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
#undef BLOCK
static void
-xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_p5_mmx_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
unsigned long lines = bytes >> 6;
@@ -295,8 +303,9 @@ xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
}
static void
-xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
+xor_p5_mmx_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
unsigned long lines = bytes >> 6;
@@ -352,8 +361,10 @@ xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
+xor_p5_mmx_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
unsigned long lines = bytes >> 6;
@@ -418,8 +429,11 @@ xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_p5_mmx_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
{
unsigned long lines = bytes >> 6;
diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h
index 0c4e5b5e3852..7f81dd5897f4 100644
--- a/arch/x86/include/asm/xor_avx.h
+++ b/arch/x86/include/asm/xor_avx.h
@@ -26,7 +26,8 @@
BLOCK4(8) \
BLOCK4(12)
-static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1)
+static void xor_avx_2(unsigned long bytes, unsigned long * __restrict p0,
+ const unsigned long * __restrict p1)
{
unsigned long lines = bytes >> 9;
@@ -52,8 +53,9 @@ do { \
kernel_fpu_end();
}
-static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1,
- unsigned long *p2)
+static void xor_avx_3(unsigned long bytes, unsigned long * __restrict p0,
+ const unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
unsigned long lines = bytes >> 9;
@@ -82,8 +84,10 @@ do { \
kernel_fpu_end();
}
-static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1,
- unsigned long *p2, unsigned long *p3)
+static void xor_avx_4(unsigned long bytes, unsigned long * __restrict p0,
+ const unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
unsigned long lines = bytes >> 9;
@@ -115,8 +119,11 @@ do { \
kernel_fpu_end();
}
-static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1,
- unsigned long *p2, unsigned long *p3, unsigned long *p4)
+static void xor_avx_5(unsigned long bytes, unsigned long * __restrict p0,
+ const unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
unsigned long lines = bytes >> 9;
diff --git a/arch/x86/include/uapi/asm/amd_hsmp.h b/arch/x86/include/uapi/asm/amd_hsmp.h
new file mode 100644
index 000000000000..769b939444ae
--- /dev/null
+++ b/arch/x86/include/uapi/asm/amd_hsmp.h
@@ -0,0 +1,307 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+#ifndef _UAPI_ASM_X86_AMD_HSMP_H_
+#define _UAPI_ASM_X86_AMD_HSMP_H_
+
+#include <linux/types.h>
+
+#pragma pack(4)
+
+#define HSMP_MAX_MSG_LEN 8
+
+/*
+ * HSMP Messages supported
+ */
+enum hsmp_message_ids {
+ HSMP_TEST = 1, /* 01h Increments input value by 1 */
+ HSMP_GET_SMU_VER, /* 02h SMU FW version */
+ HSMP_GET_PROTO_VER, /* 03h HSMP interface version */
+ HSMP_GET_SOCKET_POWER, /* 04h average package power consumption */
+ HSMP_SET_SOCKET_POWER_LIMIT, /* 05h Set the socket power limit */
+ HSMP_GET_SOCKET_POWER_LIMIT, /* 06h Get current socket power limit */
+ HSMP_GET_SOCKET_POWER_LIMIT_MAX,/* 07h Get maximum socket power value */
+ HSMP_SET_BOOST_LIMIT, /* 08h Set a core maximum frequency limit */
+ HSMP_SET_BOOST_LIMIT_SOCKET, /* 09h Set socket maximum frequency level */
+ HSMP_GET_BOOST_LIMIT, /* 0Ah Get current frequency limit */
+ HSMP_GET_PROC_HOT, /* 0Bh Get PROCHOT status */
+ HSMP_SET_XGMI_LINK_WIDTH, /* 0Ch Set max and min width of xGMI Link */
+ HSMP_SET_DF_PSTATE, /* 0Dh Alter APEnable/Disable messages behavior */
+ HSMP_SET_AUTO_DF_PSTATE, /* 0Eh Enable DF P-State Performance Boost algorithm */
+ HSMP_GET_FCLK_MCLK, /* 0Fh Get FCLK and MEMCLK for current socket */
+ HSMP_GET_CCLK_THROTTLE_LIMIT, /* 10h Get CCLK frequency limit in socket */
+ HSMP_GET_C0_PERCENT, /* 11h Get average C0 residency in socket */
+ HSMP_SET_NBIO_DPM_LEVEL, /* 12h Set max/min LCLK DPM Level for a given NBIO */
+ HSMP_GET_NBIO_DPM_LEVEL, /* 13h Get LCLK DPM level min and max for a given NBIO */
+ HSMP_GET_DDR_BANDWIDTH, /* 14h Get theoretical maximum and current DDR Bandwidth */
+ HSMP_GET_TEMP_MONITOR, /* 15h Get socket temperature */
+ HSMP_GET_DIMM_TEMP_RANGE, /* 16h Get per-DIMM temperature range and refresh rate */
+ HSMP_GET_DIMM_POWER, /* 17h Get per-DIMM power consumption */
+ HSMP_GET_DIMM_THERMAL, /* 18h Get per-DIMM thermal sensors */
+ HSMP_GET_SOCKET_FREQ_LIMIT, /* 19h Get current active frequency per socket */
+ HSMP_GET_CCLK_CORE_LIMIT, /* 1Ah Get CCLK frequency limit per core */
+ HSMP_GET_RAILS_SVI, /* 1Bh Get SVI-based Telemetry for all rails */
+ HSMP_GET_SOCKET_FMAX_FMIN, /* 1Ch Get Fmax and Fmin per socket */
+ HSMP_GET_IOLINK_BANDWITH, /* 1Dh Get current bandwidth on IO Link */
+ HSMP_GET_XGMI_BANDWITH, /* 1Eh Get current bandwidth on xGMI Link */
+ HSMP_SET_GMI3_WIDTH, /* 1Fh Set max and min GMI3 Link width */
+ HSMP_SET_PCI_RATE, /* 20h Control link rate on PCIe devices */
+ HSMP_SET_POWER_MODE, /* 21h Select power efficiency profile policy */
+ HSMP_SET_PSTATE_MAX_MIN, /* 22h Set the max and min DF P-State */
+ HSMP_MSG_ID_MAX,
+};
+
+struct hsmp_message {
+ __u32 msg_id; /* Message ID */
+ __u16 num_args; /* Number of input argument words in message */
+ __u16 response_sz; /* Number of expected output/response words */
+ __u32 args[HSMP_MAX_MSG_LEN]; /* argument/response buffer */
+ __u16 sock_ind; /* socket number */
+};
+
+enum hsmp_msg_type {
+ HSMP_RSVD = -1,
+ HSMP_SET = 0,
+ HSMP_GET = 1,
+};
+
+struct hsmp_msg_desc {
+ int num_args;
+ int response_sz;
+ enum hsmp_msg_type type;
+};
+
+/*
+ * User may use these comments as reference, please find the
+ * supported list of messages and message definition in the
+ * HSMP chapter of respective family/model PPR.
+ *
+ * Not supported messages would return -ENOMSG.
+ */
+static const struct hsmp_msg_desc hsmp_msg_desc_table[] = {
+ /* RESERVED */
+ {0, 0, HSMP_RSVD},
+
+ /*
+ * HSMP_TEST, num_args = 1, response_sz = 1
+ * input: args[0] = xx
+ * output: args[0] = xx + 1
+ */
+ {1, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_SMU_VER, num_args = 0, response_sz = 1
+ * output: args[0] = smu fw ver
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_PROTO_VER, num_args = 0, response_sz = 1
+ * output: args[0] = proto version
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_SOCKET_POWER, num_args = 0, response_sz = 1
+ * output: args[0] = socket power in mWatts
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_SET_SOCKET_POWER_LIMIT, num_args = 1, response_sz = 0
+ * input: args[0] = power limit value in mWatts
+ */
+ {1, 0, HSMP_SET},
+
+ /*
+ * HSMP_GET_SOCKET_POWER_LIMIT, num_args = 0, response_sz = 1
+ * output: args[0] = socket power limit value in mWatts
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_SOCKET_POWER_LIMIT_MAX, num_args = 0, response_sz = 1
+ * output: args[0] = maximuam socket power limit in mWatts
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_SET_BOOST_LIMIT, num_args = 1, response_sz = 0
+ * input: args[0] = apic id[31:16] + boost limit value in MHz[15:0]
+ */
+ {1, 0, HSMP_SET},
+
+ /*
+ * HSMP_SET_BOOST_LIMIT_SOCKET, num_args = 1, response_sz = 0
+ * input: args[0] = boost limit value in MHz
+ */
+ {1, 0, HSMP_SET},
+
+ /*
+ * HSMP_GET_BOOST_LIMIT, num_args = 1, response_sz = 1
+ * input: args[0] = apic id
+ * output: args[0] = boost limit value in MHz
+ */
+ {1, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_PROC_HOT, num_args = 0, response_sz = 1
+ * output: args[0] = proc hot status
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_SET_XGMI_LINK_WIDTH, num_args = 1, response_sz = 0
+ * input: args[0] = min link width[15:8] + max link width[7:0]
+ */
+ {1, 0, HSMP_SET},
+
+ /*
+ * HSMP_SET_DF_PSTATE, num_args = 1, response_sz = 0
+ * input: args[0] = df pstate[7:0]
+ */
+ {1, 0, HSMP_SET},
+
+ /* HSMP_SET_AUTO_DF_PSTATE, num_args = 0, response_sz = 0 */
+ {0, 0, HSMP_SET},
+
+ /*
+ * HSMP_GET_FCLK_MCLK, num_args = 0, response_sz = 2
+ * output: args[0] = fclk in MHz, args[1] = mclk in MHz
+ */
+ {0, 2, HSMP_GET},
+
+ /*
+ * HSMP_GET_CCLK_THROTTLE_LIMIT, num_args = 0, response_sz = 1
+ * output: args[0] = core clock in MHz
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_C0_PERCENT, num_args = 0, response_sz = 1
+ * output: args[0] = average c0 residency
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_SET_NBIO_DPM_LEVEL, num_args = 1, response_sz = 0
+ * input: args[0] = nbioid[23:16] + max dpm level[15:8] + min dpm level[7:0]
+ */
+ {1, 0, HSMP_SET},
+
+ /*
+ * HSMP_GET_NBIO_DPM_LEVEL, num_args = 1, response_sz = 1
+ * input: args[0] = nbioid[23:16]
+ * output: args[0] = max dpm level[15:8] + min dpm level[7:0]
+ */
+ {1, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_DDR_BANDWIDTH, num_args = 0, response_sz = 1
+ * output: args[0] = max bw in Gbps[31:20] + utilised bw in Gbps[19:8] +
+ * bw in percentage[7:0]
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_TEMP_MONITOR, num_args = 0, response_sz = 1
+ * output: args[0] = temperature in degree celsius. [15:8] integer part +
+ * [7:5] fractional part
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_DIMM_TEMP_RANGE, num_args = 1, response_sz = 1
+ * input: args[0] = DIMM address[7:0]
+ * output: args[0] = refresh rate[3] + temperature range[2:0]
+ */
+ {1, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_DIMM_POWER, num_args = 1, response_sz = 1
+ * input: args[0] = DIMM address[7:0]
+ * output: args[0] = DIMM power in mW[31:17] + update rate in ms[16:8] +
+ * DIMM address[7:0]
+ */
+ {1, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_DIMM_THERMAL, num_args = 1, response_sz = 1
+ * input: args[0] = DIMM address[7:0]
+ * output: args[0] = temperature in degree celcius[31:21] + update rate in ms[16:8] +
+ * DIMM address[7:0]
+ */
+ {1, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_SOCKET_FREQ_LIMIT, num_args = 0, response_sz = 1
+ * output: args[0] = frequency in MHz[31:16] + frequency source[15:0]
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_CCLK_CORE_LIMIT, num_args = 1, response_sz = 1
+ * input: args[0] = apic id [31:0]
+ * output: args[0] = frequency in MHz[31:0]
+ */
+ {1, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_RAILS_SVI, num_args = 0, response_sz = 1
+ * output: args[0] = power in mW[31:0]
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_SOCKET_FMAX_FMIN, num_args = 0, response_sz = 1
+ * output: args[0] = fmax in MHz[31:16] + fmin in MHz[15:0]
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_IOLINK_BANDWITH, num_args = 1, response_sz = 1
+ * input: args[0] = link id[15:8] + bw type[2:0]
+ * output: args[0] = io bandwidth in Mbps[31:0]
+ */
+ {1, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_XGMI_BANDWITH, num_args = 1, response_sz = 1
+ * input: args[0] = link id[15:8] + bw type[2:0]
+ * output: args[0] = xgmi bandwidth in Mbps[31:0]
+ */
+ {1, 1, HSMP_GET},
+
+ /*
+ * HSMP_SET_GMI3_WIDTH, num_args = 1, response_sz = 0
+ * input: args[0] = min link width[15:8] + max link width[7:0]
+ */
+ {1, 0, HSMP_SET},
+
+ /*
+ * HSMP_SET_PCI_RATE, num_args = 1, response_sz = 1
+ * input: args[0] = link rate control value
+ * output: args[0] = previous link rate control value
+ */
+ {1, 1, HSMP_SET},
+
+ /*
+ * HSMP_SET_POWER_MODE, num_args = 1, response_sz = 0
+ * input: args[0] = power efficiency mode[2:0]
+ */
+ {1, 0, HSMP_SET},
+
+ /*
+ * HSMP_SET_PSTATE_MAX_MIN, num_args = 1, response_sz = 0
+ * input: args[0] = min df pstate[15:8] + max df pstate[7:0]
+ */
+ {1, 0, HSMP_SET},
+};
+
+/* Reset to default packing */
+#pragma pack()
+
+/* Define unique ioctl command for hsmp msgs using generic _IOWR */
+#define HSMP_BASE_IOCTL_NR 0xF8
+#define HSMP_IOCTL_CMD _IOWR(HSMP_BASE_IOCTL_NR, 0, struct hsmp_message)
+
+#endif /*_ASM_X86_AMD_HSMP_H_*/
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index b25d3f82c2f3..01d19fc22346 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -10,11 +10,13 @@
#define SETUP_EFI 4
#define SETUP_APPLE_PROPERTIES 5
#define SETUP_JAILHOUSE 6
+#define SETUP_CC_BLOB 7
+#define SETUP_IMA 8
+#define SETUP_RNG_SEED 9
+#define SETUP_ENUM_MAX SETUP_RNG_SEED
#define SETUP_INDIRECT (1<<31)
-
-/* SETUP_INDIRECT | max(SETUP_*) */
-#define SETUP_TYPE_MAX (SETUP_INDIRECT | SETUP_JAILHOUSE)
+#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT)
/* ram_size flags */
#define RAMDISK_IMAGE_START_MASK 0x07FF
@@ -51,7 +53,7 @@ struct setup_data {
__u64 next;
__u32 type;
__u32 len;
- __u8 data[0];
+ __u8 data[];
};
/* extensible setup indirect data node */
@@ -171,6 +173,14 @@ struct jailhouse_setup_data {
} __attribute__((packed)) v2;
} __attribute__((packed));
+/*
+ * IMA buffer setup data information from the previous kernel during kexec
+ */
+struct ima_setup_data {
+ __u64 addr;
+ __u64 size;
+} __attribute__((packed));
+
/* The so-called "zeropage" */
struct boot_params {
struct screen_info screen_info; /* 0x000 */
@@ -187,7 +197,8 @@ struct boot_params {
__u32 ext_ramdisk_image; /* 0x0c0 */
__u32 ext_ramdisk_size; /* 0x0c4 */
__u32 ext_cmd_line_ptr; /* 0x0c8 */
- __u8 _pad4[116]; /* 0x0cc */
+ __u8 _pad4[112]; /* 0x0cc */
+ __u32 cc_blob_address; /* 0x13c */
struct edid_info edid_info; /* 0x140 */
struct efi_info efi_info; /* 0x1c0 */
__u32 alt_mem_k; /* 0x1e0 */
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 5a776a08f78c..46de10a809ec 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -198,13 +198,13 @@ struct kvm_msrs {
__u32 nmsrs; /* number of msrs in entries */
__u32 pad;
- struct kvm_msr_entry entries[0];
+ struct kvm_msr_entry entries[];
};
/* for KVM_GET_MSR_INDEX_LIST */
struct kvm_msr_list {
__u32 nmsrs; /* number of msrs in entries */
- __u32 indices[0];
+ __u32 indices[];
};
/* Maximum size of any access bitmap in bytes */
@@ -241,7 +241,7 @@ struct kvm_cpuid_entry {
struct kvm_cpuid {
__u32 nent;
__u32 padding;
- struct kvm_cpuid_entry entries[0];
+ struct kvm_cpuid_entry entries[];
};
struct kvm_cpuid_entry2 {
@@ -263,7 +263,7 @@ struct kvm_cpuid_entry2 {
struct kvm_cpuid2 {
__u32 nent;
__u32 padding;
- struct kvm_cpuid_entry2 entries[0];
+ struct kvm_cpuid_entry2 entries[];
};
/* for KVM_GET_PIT and KVM_SET_PIT */
@@ -306,7 +306,8 @@ struct kvm_pit_state {
struct kvm_pit_channel_state channels[3];
};
-#define KVM_PIT_FLAGS_HPET_LEGACY 0x00000001
+#define KVM_PIT_FLAGS_HPET_LEGACY 0x00000001
+#define KVM_PIT_FLAGS_SPEAKER_DATA_ON 0x00000002
struct kvm_pit_state2 {
struct kvm_pit_channel_state channels[3];
@@ -325,6 +326,7 @@ struct kvm_reinject_control {
#define KVM_VCPUEVENT_VALID_SHADOW 0x00000004
#define KVM_VCPUEVENT_VALID_SMM 0x00000008
#define KVM_VCPUEVENT_VALID_PAYLOAD 0x00000010
+#define KVM_VCPUEVENT_VALID_TRIPLE_FAULT 0x00000020
/* Interrupt shadow states */
#define KVM_X86_SHADOW_INT_MOV_SS 0x01
@@ -359,7 +361,10 @@ struct kvm_vcpu_events {
__u8 smm_inside_nmi;
__u8 latched_init;
} smi;
- __u8 reserved[27];
+ struct {
+ __u8 pending;
+ } triple_fault;
+ __u8 reserved[26];
__u8 exception_has_payload;
__u64 exception_payload;
};
@@ -373,9 +378,23 @@ struct kvm_debugregs {
__u64 reserved[9];
};
-/* for KVM_CAP_XSAVE */
+/* for KVM_CAP_XSAVE and KVM_CAP_XSAVE2 */
struct kvm_xsave {
+ /*
+ * KVM_GET_XSAVE2 and KVM_SET_XSAVE write and read as many bytes
+ * as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
+ * respectively, when invoked on the vm file descriptor.
+ *
+ * The size value returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
+ * will always be at least 4096. Currently, it is only greater
+ * than 4096 if a dynamic feature has been enabled with
+ * ``arch_prctl()``, but this may change in the future.
+ *
+ * The offsets of the state save areas in struct kvm_xsave follow
+ * the contents of CPUID leaf 0xD on the host.
+ */
__u32 region[1024];
+ __u32 extra[];
};
#define KVM_MAX_XCRS 16
@@ -414,11 +433,13 @@ struct kvm_sync_regs {
struct kvm_vcpu_events events;
};
-#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
-#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
-#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
-#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3)
-#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4)
+#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
+#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
+#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
+#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3)
+#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4)
+#define KVM_X86_QUIRK_FIX_HYPERCALL_INSN (1 << 5)
+#define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6)
#define KVM_STATE_NESTED_FORMAT_VMX 0
#define KVM_STATE_NESTED_FORMAT_SVM 1
@@ -438,6 +459,9 @@ struct kvm_sync_regs {
#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001
+/* attributes for system fd (group 0) */
+#define KVM_X86_XCOMP_GUEST_SUPP 0
+
struct kvm_vmx_nested_state_data {
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
@@ -498,7 +522,7 @@ struct kvm_pmu_event_filter {
__u32 fixed_counter_bitmap;
__u32 flags;
__u32 pad[4];
- __u64 events[0];
+ __u64 events[];
};
#define KVM_PMU_EVENT_ALLOW 0
diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h
index d4a8d0424bfb..775dbd3aff73 100644
--- a/arch/x86/include/uapi/asm/mman.h
+++ b/arch/x86/include/uapi/asm/mman.h
@@ -5,20 +5,6 @@
#define MAP_32BIT 0x40 /* only give out 32bit addresses */
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-/*
- * Take the 4 protection key bits out of the vma->vm_flags
- * value and turn them in to the bits that we can put in
- * to a pte.
- *
- * Only override these if Protection Keys are available
- * (which is only on 64-bit).
- */
-#define arch_vm_get_page_prot(vm_flags) __pgprot( \
- ((vm_flags) & VM_PKEY_BIT0 ? _PAGE_PKEY_BIT0 : 0) | \
- ((vm_flags) & VM_PKEY_BIT1 ? _PAGE_PKEY_BIT1 : 0) | \
- ((vm_flags) & VM_PKEY_BIT2 ? _PAGE_PKEY_BIT2 : 0) | \
- ((vm_flags) & VM_PKEY_BIT3 ? _PAGE_PKEY_BIT3 : 0))
-
#define arch_calc_vm_prot_bits(prot, key) ( \
((key) & 0x1 ? VM_PKEY_BIT0 : 0) | \
((key) & 0x2 ? VM_PKEY_BIT1 : 0) | \
diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index 754a07856817..500b96e71f18 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -2,20 +2,22 @@
#ifndef _ASM_X86_PRCTL_H
#define _ASM_X86_PRCTL_H
-#define ARCH_SET_GS 0x1001
-#define ARCH_SET_FS 0x1002
-#define ARCH_GET_FS 0x1003
-#define ARCH_GET_GS 0x1004
+#define ARCH_SET_GS 0x1001
+#define ARCH_SET_FS 0x1002
+#define ARCH_GET_FS 0x1003
+#define ARCH_GET_GS 0x1004
-#define ARCH_GET_CPUID 0x1011
-#define ARCH_SET_CPUID 0x1012
+#define ARCH_GET_CPUID 0x1011
+#define ARCH_SET_CPUID 0x1012
-#define ARCH_GET_XCOMP_SUPP 0x1021
-#define ARCH_GET_XCOMP_PERM 0x1022
-#define ARCH_REQ_XCOMP_PERM 0x1023
+#define ARCH_GET_XCOMP_SUPP 0x1021
+#define ARCH_GET_XCOMP_PERM 0x1022
+#define ARCH_REQ_XCOMP_PERM 0x1023
+#define ARCH_GET_XCOMP_GUEST_PERM 0x1024
+#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025
-#define ARCH_MAP_VDSO_X32 0x2001
-#define ARCH_MAP_VDSO_32 0x2002
-#define ARCH_MAP_VDSO_64 0x2003
+#define ARCH_MAP_VDSO_X32 0x2001
+#define ARCH_MAP_VDSO_32 0x2002
+#define ARCH_MAP_VDSO_64 0x2003
#endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index bcba3c643e63..c47cc7f2feeb 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -130,6 +130,8 @@
#define X86_CR4_SMAP _BITUL(X86_CR4_SMAP_BIT)
#define X86_CR4_PKE_BIT 22 /* enable Protection Keys support */
#define X86_CR4_PKE _BITUL(X86_CR4_PKE_BIT)
+#define X86_CR4_CET_BIT 23 /* enable Control-flow Enforcement Technology */
+#define X86_CR4_CET _BITUL(X86_CR4_CET_BIT)
/*
* x86-64 Task Priority Register, CR8
diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h
index f4b81587e90b..2dd35bbdc822 100644
--- a/arch/x86/include/uapi/asm/sgx.h
+++ b/arch/x86/include/uapi/asm/sgx.h
@@ -29,6 +29,12 @@ enum sgx_page_flags {
_IOW(SGX_MAGIC, 0x03, struct sgx_enclave_provision)
#define SGX_IOC_VEPC_REMOVE_ALL \
_IO(SGX_MAGIC, 0x04)
+#define SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS \
+ _IOWR(SGX_MAGIC, 0x05, struct sgx_enclave_restrict_permissions)
+#define SGX_IOC_ENCLAVE_MODIFY_TYPES \
+ _IOWR(SGX_MAGIC, 0x06, struct sgx_enclave_modify_types)
+#define SGX_IOC_ENCLAVE_REMOVE_PAGES \
+ _IOWR(SGX_MAGIC, 0x07, struct sgx_enclave_remove_pages)
/**
* struct sgx_enclave_create - parameter structure for the
@@ -76,6 +82,62 @@ struct sgx_enclave_provision {
__u64 fd;
};
+/**
+ * struct sgx_enclave_restrict_permissions - parameters for ioctl
+ * %SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS
+ * @offset: starting page offset (page aligned relative to enclave base
+ * address defined in SECS)
+ * @length: length of memory (multiple of the page size)
+ * @permissions:new permission bits for pages in range described by @offset
+ * and @length
+ * @result: (output) SGX result code of ENCLS[EMODPR] function
+ * @count: (output) bytes successfully changed (multiple of page size)
+ */
+struct sgx_enclave_restrict_permissions {
+ __u64 offset;
+ __u64 length;
+ __u64 permissions;
+ __u64 result;
+ __u64 count;
+};
+
+/**
+ * struct sgx_enclave_modify_types - parameters for ioctl
+ * %SGX_IOC_ENCLAVE_MODIFY_TYPES
+ * @offset: starting page offset (page aligned relative to enclave base
+ * address defined in SECS)
+ * @length: length of memory (multiple of the page size)
+ * @page_type: new type for pages in range described by @offset and @length
+ * @result: (output) SGX result code of ENCLS[EMODT] function
+ * @count: (output) bytes successfully changed (multiple of page size)
+ */
+struct sgx_enclave_modify_types {
+ __u64 offset;
+ __u64 length;
+ __u64 page_type;
+ __u64 result;
+ __u64 count;
+};
+
+/**
+ * struct sgx_enclave_remove_pages - %SGX_IOC_ENCLAVE_REMOVE_PAGES parameters
+ * @offset: starting page offset (page aligned relative to enclave base
+ * address defined in SECS)
+ * @length: length of memory (multiple of the page size)
+ * @count: (output) bytes successfully changed (multiple of page size)
+ *
+ * Regular (PT_REG) or TCS (PT_TCS) can be removed from an initialized
+ * enclave if the system supports SGX2. First, the %SGX_IOC_ENCLAVE_MODIFY_TYPES
+ * ioctl() should be used to change the page type to PT_TRIM. After that
+ * succeeds ENCLU[EACCEPT] should be run from within the enclave and then
+ * %SGX_IOC_ENCLAVE_REMOVE_PAGES can be used to complete the page removal.
+ */
+struct sgx_enclave_remove_pages {
+ __u64 offset;
+ __u64 length;
+ __u64 count;
+};
+
struct sgx_enclave_run;
/**
diff --git a/arch/x86/include/uapi/asm/shmbuf.h b/arch/x86/include/uapi/asm/shmbuf.h
index fce18eaa070c..13775bfdfee2 100644
--- a/arch/x86/include/uapi/asm/shmbuf.h
+++ b/arch/x86/include/uapi/asm/shmbuf.h
@@ -5,6 +5,10 @@
#if !defined(__x86_64__) || !defined(__ILP32__)
#include <asm-generic/shmbuf.h>
#else
+
+#include <asm/ipcbuf.h>
+#include <asm/posix_types.h>
+
/*
* The shmid64_ds structure for x86 architecture with x32 ABI.
*
@@ -15,7 +19,7 @@
struct shmid64_ds {
struct ipc64_perm shm_perm; /* operation perms */
- size_t shm_segsz; /* size of segment (bytes) */
+ __kernel_size_t shm_segsz; /* size of segment (bytes) */
__kernel_long_t shm_atime; /* last attach time */
__kernel_long_t shm_dtime; /* last detach time */
__kernel_long_t shm_ctime; /* last change time */
diff --git a/arch/x86/include/uapi/asm/signal.h b/arch/x86/include/uapi/asm/signal.h
index 164a22a72984..777c3a0f4e23 100644
--- a/arch/x86/include/uapi/asm/signal.h
+++ b/arch/x86/include/uapi/asm/signal.h
@@ -104,7 +104,7 @@ struct sigaction {
typedef struct sigaltstack {
void __user *ss_sp;
int ss_flags;
- size_t ss_size;
+ __kernel_size_t ss_size;
} stack_t;
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index efa969325ede..f69c168391aa 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -108,6 +108,14 @@
#define SVM_VMGEXIT_AP_JUMP_TABLE 0x80000005
#define SVM_VMGEXIT_SET_AP_JUMP_TABLE 0
#define SVM_VMGEXIT_GET_AP_JUMP_TABLE 1
+#define SVM_VMGEXIT_PSC 0x80000010
+#define SVM_VMGEXIT_GUEST_REQUEST 0x80000011
+#define SVM_VMGEXIT_EXT_GUEST_REQUEST 0x80000012
+#define SVM_VMGEXIT_AP_CREATION 0x80000013
+#define SVM_VMGEXIT_AP_CREATE_ON_INIT 0
+#define SVM_VMGEXIT_AP_CREATE 1
+#define SVM_VMGEXIT_AP_DESTROY 2
+#define SVM_VMGEXIT_HV_FEATURES 0x8000fffd
#define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff
/* Exit code reserved for hypervisor/software use */
@@ -218,6 +226,11 @@
{ SVM_VMGEXIT_NMI_COMPLETE, "vmgexit_nmi_complete" }, \
{ SVM_VMGEXIT_AP_HLT_LOOP, "vmgexit_ap_hlt_loop" }, \
{ SVM_VMGEXIT_AP_JUMP_TABLE, "vmgexit_ap_jump_table" }, \
+ { SVM_VMGEXIT_PSC, "vmgexit_page_state_change" }, \
+ { SVM_VMGEXIT_GUEST_REQUEST, "vmgexit_guest_request" }, \
+ { SVM_VMGEXIT_EXT_GUEST_REQUEST, "vmgexit_ext_guest_request" }, \
+ { SVM_VMGEXIT_AP_CREATION, "vmgexit_ap_creation" }, \
+ { SVM_VMGEXIT_HV_FEATURES, "vmgexit_hypervisor_feature" }, \
{ SVM_EXIT_ERR, "invalid_guest_state" }
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 946d761adbd3..a5faf6d88f1b 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -91,6 +91,7 @@
#define EXIT_REASON_UMWAIT 67
#define EXIT_REASON_TPAUSE 68
#define EXIT_REASON_BUS_LOCK 74
+#define EXIT_REASON_NOTIFY 75
#define VMX_EXIT_REASONS \
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
@@ -153,7 +154,8 @@
{ EXIT_REASON_XRSTORS, "XRSTORS" }, \
{ EXIT_REASON_UMWAIT, "UMWAIT" }, \
{ EXIT_REASON_TPAUSE, "TPAUSE" }, \
- { EXIT_REASON_BUS_LOCK, "BUS_LOCK" }
+ { EXIT_REASON_BUS_LOCK, "BUS_LOCK" }, \
+ { EXIT_REASON_NOTIFY, "NOTIFY" }
#define VMX_EXIT_REASON_FLAGS \
{ VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" }