aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--arch/x86/boot/compressed/Makefile1
-rw-r--r--arch/x86/configs/i386_defconfig1
-rw-r--r--arch/x86/configs/x86_64_defconfig1
-rw-r--r--arch/x86/crypto/sha-mb/sha1_x8_avx2.S13
-rw-r--r--arch/x86/entry/thunk_64.S11
-rw-r--r--arch/x86/entry/vdso/Makefile4
-rw-r--r--arch/x86/entry/vdso/vma.c3
-rw-r--r--arch/x86/ia32/ia32_aout.c29
-rw-r--r--arch/x86/include/asm/bugs.h8
-rw-r--r--arch/x86/include/asm/cpufeature.h12
-rw-r--r--arch/x86/include/asm/disabled-features.h6
-rw-r--r--arch/x86/include/asm/intel_telemetry.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h32
-rw-r--r--arch/x86/include/asm/livepatch.h2
-rw-r--r--arch/x86/include/asm/pgtable.h1
-rw-r--r--arch/x86/include/asm/pmc_core.h27
-rw-r--r--arch/x86/include/asm/svm.h12
-rw-r--r--arch/x86/include/asm/uaccess.h5
-rw-r--r--arch/x86/include/asm/uaccess_32.h62
-rw-r--r--arch/x86/include/asm/uaccess_64.h7
-rw-r--r--arch/x86/include/uapi/asm/kvm.h6
-rw-r--r--arch/x86/include/uapi/asm/svm.h49
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/boot.c1
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c1
-rw-r--r--arch/x86/kernel/cpu/common.c7
-rw-r--r--arch/x86/kernel/cpu/intel.c37
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c2
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c18
-rw-r--r--arch/x86/kernel/livepatch.c70
-rw-r--r--arch/x86/kernel/machine_kexec_64.c45
-rw-r--r--arch/x86/kernel/mcount_64.S3
-rw-r--r--arch/x86/kernel/process.c5
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--arch/x86/kernel/ptrace.c4
-rw-r--r--arch/x86/kernel/setup.c12
-rw-r--r--arch/x86/kernel/tsc_msr.c3
-rw-r--r--arch/x86/kvm/cpuid.c22
-rw-r--r--arch/x86/kvm/ioapic.c2
-rw-r--r--arch/x86/kvm/iommu.c2
-rw-r--r--arch/x86/kvm/irq_comm.c3
-rw-r--r--arch/x86/kvm/lapic.c193
-rw-r--r--arch/x86/kvm/lapic.h38
-rw-r--r--arch/x86/kvm/mmu.c36
-rw-r--r--arch/x86/kvm/mtrr.c2
-rw-r--r--arch/x86/kvm/svm.c670
-rw-r--r--arch/x86/kvm/trace.h57
-rw-r--r--arch/x86/kvm/vmx.c60
-rw-r--r--arch/x86/kvm/x86.c76
-rw-r--r--arch/x86/mm/fault.c11
-rw-r--r--arch/x86/mm/hugetlbpage.c1
-rw-r--r--arch/x86/mm/numa.c4
-rw-r--r--arch/x86/net/bpf_jit_comp.c70
-rw-r--r--arch/x86/pci/acpi.c1
-rw-r--r--arch/x86/pci/common.c2
-rw-r--r--arch/x86/pci/fixup.c7
-rw-r--r--arch/x86/pci/xen.c7
-rw-r--r--arch/x86/platform/efi/efi_stub_64.S9
-rw-r--r--arch/x86/platform/efi/quirks.c2
-rw-r--r--arch/x86/purgatory/Makefile2
-rw-r--r--arch/x86/realmode/rm/Makefile1
-rw-r--r--arch/x86/um/os-Linux/registers.c49
-rw-r--r--arch/x86/um/ptrace_32.c5
-rw-r--r--arch/x86/um/ptrace_64.c16
-rw-r--r--arch/x86/um/shared/sysdep/ptrace_64.h4
-rw-r--r--arch/x86/um/signal.c37
-rw-r--r--arch/x86/um/user-offsets.c2
-rw-r--r--arch/x86/um/vdso/vma.c3
-rw-r--r--arch/x86/xen/setup.c65
-rw-r--r--arch/x86/xen/time.c6
71 files changed, 1439 insertions, 537 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7bb15747fea2..0a7b885964ba 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -91,7 +91,7 @@ config X86
select HAVE_ARCH_SOFT_DIRTY if X86_64
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
- select HAVE_BPF_JIT if X86_64
+ select HAVE_EBPF_JIT if X86_64
select HAVE_CC_STACKPROTECTOR
select HAVE_CMPXCHG_DOUBLE
select HAVE_CMPXCHG_LOCAL
@@ -105,6 +105,7 @@ config X86
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_EFFICIENT_UNALIGNED_ACCESS
+ select HAVE_EXIT_THREAD
select HAVE_FENTRY if X86_64
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_GRAPH_FP_TEST
@@ -130,6 +131,7 @@ config X86
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_MIXED_BREAKPOINTS_REGS
+ select HAVE_NMI
select HAVE_OPROFILE
select HAVE_OPTPROBES
select HAVE_PCSPKR_PLATFORM
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index cfdd8c3f8af2..f1356889204e 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -87,7 +87,6 @@ vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
$(obj)/vmlinux: $(vmlinux-objs-y) FORCE
$(call if_changed,ld)
- @:
OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
$(obj)/vmlinux.bin: vmlinux FORCE
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 265901a84f3f..5fa6ee2c2dde 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -17,7 +17,6 @@ CONFIG_CGROUPS=y
CONFIG_CGROUP_FREEZER=y
CONFIG_CPUSETS=y
CONFIG_CGROUP_CPUACCT=y
-CONFIG_RESOURCE_COUNTERS=y
CONFIG_CGROUP_SCHED=y
CONFIG_BLK_DEV_INITRD=y
# CONFIG_COMPAT_BRK is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 0c8d7963483c..d28bdabcc87e 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -16,7 +16,6 @@ CONFIG_CGROUPS=y
CONFIG_CGROUP_FREEZER=y
CONFIG_CPUSETS=y
CONFIG_CGROUP_CPUACCT=y
-CONFIG_RESOURCE_COUNTERS=y
CONFIG_CGROUP_SCHED=y
CONFIG_BLK_DEV_INITRD=y
# CONFIG_COMPAT_BRK is not set
diff --git a/arch/x86/crypto/sha-mb/sha1_x8_avx2.S b/arch/x86/crypto/sha-mb/sha1_x8_avx2.S
index 8e1b47792b31..c9dae1cd2919 100644
--- a/arch/x86/crypto/sha-mb/sha1_x8_avx2.S
+++ b/arch/x86/crypto/sha-mb/sha1_x8_avx2.S
@@ -296,7 +296,11 @@ W14 = TMP_
#
ENTRY(sha1_x8_avx2)
- push RSP_SAVE
+ # save callee-saved clobbered registers to comply with C function ABI
+ push %r12
+ push %r13
+ push %r14
+ push %r15
#save rsp
mov %rsp, RSP_SAVE
@@ -446,7 +450,12 @@ lloop:
## Postamble
mov RSP_SAVE, %rsp
- pop RSP_SAVE
+
+ # restore callee-saved clobbered registers
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
ret
ENDPROC(sha1_x8_avx2)
diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
index 98df1fa8825c..027aec4a74df 100644
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -8,16 +8,15 @@
#include <linux/linkage.h>
#include "calling.h"
#include <asm/asm.h>
-#include <asm/frame.h>
/* rdi: arg1 ... normal C conventions. rax is saved/restored. */
.macro THUNK name, func, put_ret_addr_in_rdi=0
.globl \name
.type \name, @function
\name:
- FRAME_BEGIN
+ pushq %rbp
+ movq %rsp, %rbp
- /* this one pushes 9 elems, the next one would be %rIP */
pushq %rdi
pushq %rsi
pushq %rdx
@@ -29,8 +28,8 @@
pushq %r11
.if \put_ret_addr_in_rdi
- /* 9*8(%rsp) is return addr on stack */
- movq 9*8(%rsp), %rdi
+ /* 8(%rbp) is return addr on stack */
+ movq 8(%rbp), %rdi
.endif
call \func
@@ -65,7 +64,7 @@ restore:
popq %rdx
popq %rsi
popq %rdi
- FRAME_END
+ popq %rbp
ret
_ASM_NOKPROBE(restore)
#endif
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 6874da5f67fc..253b72eaade6 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -193,10 +193,10 @@ vdso_img_insttargets := $(vdso_img_sodbg:%.dbg=install_%)
$(MODLIB)/vdso: FORCE
@mkdir -p $(MODLIB)/vdso
-$(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE
+$(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso
$(call cmd,vdso_install)
PHONY += vdso_install $(vdso_img_insttargets)
-vdso_install: $(vdso_img_insttargets) FORCE
+vdso_install: $(vdso_img_insttargets)
clean-files := vdso32.so vdso32.so.dbg vdso64* vdso-image-*.c vdsox32.so*
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index b3cf81333a54..ab220ac9b3b9 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -163,7 +163,8 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
addr = 0;
}
- down_write(&mm->mmap_sem);
+ if (down_write_killable(&mm->mmap_sem))
+ return -EINTR;
addr = get_unmapped_area(NULL, addr,
image->size - image->sym_vvar_start, 0, 0);
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index ae6aad1d24f7..cb26f18d43af 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -116,13 +116,13 @@ static struct linux_binfmt aout_format = {
.min_coredump = PAGE_SIZE
};
-static void set_brk(unsigned long start, unsigned long end)
+static int set_brk(unsigned long start, unsigned long end)
{
start = PAGE_ALIGN(start);
end = PAGE_ALIGN(end);
if (end <= start)
- return;
- vm_brk(start, end - start);
+ return 0;
+ return vm_brk(start, end - start);
}
#ifdef CONFIG_COREDUMP
@@ -321,7 +321,7 @@ static int load_aout_binary(struct linux_binprm *bprm)
error = vm_brk(text_addr & PAGE_MASK, map_size);
- if (error != (text_addr & PAGE_MASK))
+ if (error)
return error;
error = read_code(bprm->file, text_addr, 32,
@@ -349,7 +349,10 @@ static int load_aout_binary(struct linux_binprm *bprm)
#endif
if (!bprm->file->f_op->mmap || (fd_offset & ~PAGE_MASK) != 0) {
- vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
+ error = vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
+ if (error)
+ return error;
+
read_code(bprm->file, N_TXTADDR(ex), fd_offset,
ex.a_text+ex.a_data);
goto beyond_if;
@@ -372,10 +375,13 @@ static int load_aout_binary(struct linux_binprm *bprm)
if (error != N_DATADDR(ex))
return error;
}
+
beyond_if:
- set_binfmt(&aout_format);
+ error = set_brk(current->mm->start_brk, current->mm->brk);
+ if (error)
+ return error;
- set_brk(current->mm->start_brk, current->mm->brk);
+ set_binfmt(&aout_format);
current->mm->start_stack =
(unsigned long)create_aout_tables((char __user *)bprm->p, bprm);
@@ -434,7 +440,9 @@ static int load_aout_library(struct file *file)
error_time = jiffies;
}
#endif
- vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
+ retval = vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
+ if (retval)
+ goto out;
read_code(file, start_addr, N_TXTOFF(ex),
ex.a_text + ex.a_data);
@@ -453,9 +461,8 @@ static int load_aout_library(struct file *file)
len = PAGE_ALIGN(ex.a_text + ex.a_data);
bss = ex.a_text + ex.a_data + ex.a_bss;
if (bss > len) {
- error = vm_brk(start_addr + len, bss - len);
- retval = error;
- if (error != start_addr + len)
+ retval = vm_brk(start_addr + len, bss - len);
+ if (retval)
goto out;
}
retval = 0;
diff --git a/arch/x86/include/asm/bugs.h b/arch/x86/include/asm/bugs.h
index 08abf639075f..5490bbaf71d5 100644
--- a/arch/x86/include/asm/bugs.h
+++ b/arch/x86/include/asm/bugs.h
@@ -1,8 +1,16 @@
#ifndef _ASM_X86_BUGS_H
#define _ASM_X86_BUGS_H
+#include <asm/processor.h>
+
extern void check_bugs(void);
+#if defined(CONFIG_CPU_SUP_INTEL)
+void check_mpx_erratum(struct cpuinfo_x86 *c);
+#else
+static inline void check_mpx_erratum(struct cpuinfo_x86 *c) {}
+#endif
+
#if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_X86_32)
int ppro_with_ram_bug(void);
#else
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 25ebb54905e0..483fb547e3c0 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -64,9 +64,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
(((bit)>>5)==11 && (1UL<<((bit)&31) & REQUIRED_MASK11)) || \
(((bit)>>5)==12 && (1UL<<((bit)&31) & REQUIRED_MASK12)) || \
(((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK13)) || \
- (((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK14)) || \
- (((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK15)) || \
- (((bit)>>5)==14 && (1UL<<((bit)&31) & REQUIRED_MASK16)) )
+ (((bit)>>5)==14 && (1UL<<((bit)&31) & REQUIRED_MASK14)) || \
+ (((bit)>>5)==15 && (1UL<<((bit)&31) & REQUIRED_MASK15)) || \
+ (((bit)>>5)==16 && (1UL<<((bit)&31) & REQUIRED_MASK16)) )
#define DISABLED_MASK_BIT_SET(bit) \
( (((bit)>>5)==0 && (1UL<<((bit)&31) & DISABLED_MASK0 )) || \
@@ -83,9 +83,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
(((bit)>>5)==11 && (1UL<<((bit)&31) & DISABLED_MASK11)) || \
(((bit)>>5)==12 && (1UL<<((bit)&31) & DISABLED_MASK12)) || \
(((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK13)) || \
- (((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK14)) || \
- (((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK15)) || \
- (((bit)>>5)==14 && (1UL<<((bit)&31) & DISABLED_MASK16)) )
+ (((bit)>>5)==14 && (1UL<<((bit)&31) & DISABLED_MASK14)) || \
+ (((bit)>>5)==15 && (1UL<<((bit)&31) & DISABLED_MASK15)) || \
+ (((bit)>>5)==16 && (1UL<<((bit)&31) & DISABLED_MASK16)) )
#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 39343be7d4f4..911e9358ceb1 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -29,11 +29,11 @@
#endif /* CONFIG_X86_64 */
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-# define DISABLE_PKU (1<<(X86_FEATURE_PKU))
-# define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE))
-#else
# define DISABLE_PKU 0
# define DISABLE_OSPKE 0
+#else
+# define DISABLE_PKU (1<<(X86_FEATURE_PKU & 31))
+# define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE & 31))
#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
/*
diff --git a/arch/x86/include/asm/intel_telemetry.h b/arch/x86/include/asm/intel_telemetry.h
index ed65fe701de5..85029b58d0cd 100644
--- a/arch/x86/include/asm/intel_telemetry.h
+++ b/arch/x86/include/asm/intel_telemetry.h
@@ -99,7 +99,7 @@ struct telemetry_core_ops {
int (*reset_events)(void);
};
-int telemetry_set_pltdata(struct telemetry_core_ops *ops,
+int telemetry_set_pltdata(const struct telemetry_core_ops *ops,
struct telemetry_plt_config *pltconfig);
int telemetry_clear_pltdata(void);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b7e394485a5f..e0fbe7e70dc1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -562,7 +562,6 @@ struct kvm_vcpu_arch {
struct {
u64 msr_val;
u64 last_steal;
- u64 accum_steal;
struct gfn_to_hva_cache stime;
struct kvm_steal_time steal;
} st;
@@ -774,6 +773,11 @@ struct kvm_arch {
u8 nr_reserved_ioapic_pins;
bool disabled_lapic_found;
+
+ /* Struct members for AVIC */
+ u32 ldr_mode;
+ struct page *avic_logical_id_table_page;
+ struct page *avic_physical_id_table_page;
};
struct kvm_vm_stat {
@@ -804,6 +808,7 @@ struct kvm_vcpu_stat {
u32 halt_exits;
u32 halt_successful_poll;
u32 halt_attempted_poll;
+ u32 halt_poll_invalid;
u32 halt_wakeup;
u32 request_irq_exits;
u32 irq_exits;
@@ -848,6 +853,9 @@ struct kvm_x86_ops {
bool (*cpu_has_high_real_mode_segbase)(void);
void (*cpuid_update)(struct kvm_vcpu *vcpu);
+ int (*vm_init)(struct kvm *kvm);
+ void (*vm_destroy)(struct kvm *kvm);
+
/* Create, but do not attach this VCPU */
struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
void (*vcpu_free)(struct kvm_vcpu *vcpu);
@@ -914,7 +922,7 @@ struct kvm_x86_ops {
bool (*get_enable_apicv)(void);
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
- void (*hwapic_isr_update)(struct kvm *kvm, int isr);
+ void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
@@ -990,8 +998,13 @@ struct kvm_x86_ops {
*/
int (*pre_block)(struct kvm_vcpu *vcpu);
void (*post_block)(struct kvm_vcpu *vcpu);
+
+ void (*vcpu_blocking)(struct kvm_vcpu *vcpu);
+ void (*vcpu_unblocking)(struct kvm_vcpu *vcpu);
+
int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
uint32_t guest_irq, bool set);
+ void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
};
struct kvm_arch_async_pf {
@@ -1341,7 +1354,18 @@ bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
struct kvm_lapic_irq *irq);
-static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
+{
+ if (kvm_x86_ops->vcpu_blocking)
+ kvm_x86_ops->vcpu_blocking(vcpu);
+}
+
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
+{
+ if (kvm_x86_ops->vcpu_unblocking)
+ kvm_x86_ops->vcpu_unblocking(vcpu);
+}
+
+static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h
index 7e68f9558552..a7f9181f63f3 100644
--- a/arch/x86/include/asm/livepatch.h
+++ b/arch/x86/include/asm/livepatch.h
@@ -32,8 +32,6 @@ static inline int klp_check_compiler_support(void)
#endif
return 0;
}
-int klp_write_module_reloc(struct module *mod, unsigned long type,
- unsigned long loc, unsigned long value);
static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
{
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index f86491a7bc9d..1a27396b6ea0 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -181,6 +181,7 @@ static inline int pmd_trans_huge(pmd_t pmd)
return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
}
+#define has_transparent_hugepage has_transparent_hugepage
static inline int has_transparent_hugepage(void)
{
return boot_cpu_has(X86_FEATURE_PSE);
diff --git a/arch/x86/include/asm/pmc_core.h b/arch/x86/include/asm/pmc_core.h
new file mode 100644
index 000000000000..d4855f11136d
--- /dev/null
+++ b/arch/x86/include/asm/pmc_core.h
@@ -0,0 +1,27 @@
+/*
+ * Intel Core SoC Power Management Controller Header File
+ *
+ * Copyright (c) 2016, Intel Corporation.
+ * All Rights Reserved.
+ *
+ * Authors: Rajneesh Bhardwaj <rajneesh.bhardwaj@intel.com>
+ * Vishwanath Somayaji <vishwanath.somayaji@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef _ASM_PMC_CORE_H
+#define _ASM_PMC_CORE_H
+
+/* API to read SLP_S0_RESIDENCY counter */
+int intel_pmc_slp_s0_counter_read(u32 *data);
+
+#endif /* _ASM_PMC_CORE_H */
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 6136d99f537b..d0fe23ec7e98 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -78,7 +78,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
u32 exit_int_info;
u32 exit_int_info_err;
u64 nested_ctl;
- u8 reserved_4[16];
+ u64 avic_vapic_bar;
+ u8 reserved_4[8];
u32 event_inj;
u32 event_inj_err;
u64 nested_cr3;
@@ -88,7 +89,11 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
u64 next_rip;
u8 insn_len;
u8 insn_bytes[15];
- u8 reserved_6[800];
+ u64 avic_backing_page; /* Offset 0xe0 */
+ u8 reserved_6[8]; /* Offset 0xe8 */
+ u64 avic_logical_id; /* Offset 0xf0 */
+ u64 avic_physical_id; /* Offset 0xf8 */
+ u8 reserved_7[768];
};
@@ -111,6 +116,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define V_INTR_MASKING_SHIFT 24
#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
+#define AVIC_ENABLE_SHIFT 31
+#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
+
#define SVM_INTERRUPT_SHADOW_MASK 1
#define SVM_IOIO_STR_SHIFT 2
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 12f9653bde8d..2982387ba817 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -5,6 +5,7 @@
*/
#include <linux/errno.h>
#include <linux/compiler.h>
+#include <linux/kasan-checks.h>
#include <linux/thread_info.h>
#include <linux/string.h>
#include <asm/asm.h>
@@ -721,6 +722,8 @@ copy_from_user(void *to, const void __user *from, unsigned long n)
might_fault();
+ kasan_check_write(to, n);
+
/*
* While we would like to have the compiler do the checking for us
* even in the non-constant size case, any false positives there are
@@ -754,6 +757,8 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
{
int sz = __compiletime_object_size(from);
+ kasan_check_read(from, n);
+
might_fault();
/* See the comment in copy_from_user() above. */
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 3fe0eac59462..4b32da24faaf 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -33,46 +33,10 @@ unsigned long __must_check __copy_from_user_ll_nocache_nozero
* the specified block with access_ok() before calling this function.
* The caller should also make sure he pins the user space address
* so that we don't result in page fault and sleep.
- *
- * Here we special-case 1, 2 and 4-byte copy_*_user invocations. On a fault
- * we return the initial request size (1, 2 or 4), as copy_*_user should do.
- * If a store crosses a page boundary and gets a fault, the x86 will not write
- * anything, so this is accurate.
*/
-
static __always_inline unsigned long __must_check
__copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
{
- if (__builtin_constant_p(n)) {
- unsigned long ret;
-
- switch (n) {
- case 1:
- __uaccess_begin();
- __put_user_size(*(u8 *)from, (u8 __user *)to,
- 1, ret, 1);
- __uaccess_end();
- return ret;
- case 2:
- __uaccess_begin();
- __put_user_size(*(u16 *)from, (u16 __user *)to,
- 2, ret, 2);
- __uaccess_end();
- return ret;
- case 4:
- __uaccess_begin();
- __put_user_size(*(u32 *)from, (u32 __user *)to,
- 4, ret, 4);
- __uaccess_end();
- return ret;
- case 8:
- __uaccess_begin();
- __put_user_size(*(u64 *)from, (u64 __user *)to,
- 8, ret, 8);
- __uaccess_end();
- return ret;
- }
- }
return __copy_to_user_ll(to, from, n);
}
@@ -101,32 +65,6 @@ __copy_to_user(void __user *to, const void *from, unsigned long n)
static __always_inline unsigned long
__copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
{
- /* Avoid zeroing the tail if the copy fails..
- * If 'n' is constant and 1, 2, or 4, we do still zero on a failure,
- * but as the zeroing behaviour is only significant when n is not
- * constant, that shouldn't be a problem.
- */
- if (__builtin_constant_p(n)) {
- unsigned long ret;
-
- switch (n) {
- case 1:
- __uaccess_begin();
- __get_user_size(*(u8 *)to, from, 1, ret, 1);
- __uaccess_end();
- return ret;
- case 2:
- __uaccess_begin();
- __get_user_size(*(u16 *)to, from, 2, ret, 2);
- __uaccess_end();
- return ret;
- case 4:
- __uaccess_begin();
- __get_user_size(*(u32 *)to, from, 4, ret, 4);
- __uaccess_end();
- return ret;
- }
- }
return __copy_from_user_ll_nozero(to, from, n);
}
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 307698688fa1..2eac2aa3e37f 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -7,6 +7,7 @@
#include <linux/compiler.h>
#include <linux/errno.h>
#include <linux/lockdep.h>
+#include <linux/kasan-checks.h>
#include <asm/alternative.h>
#include <asm/cpufeatures.h>
#include <asm/page.h>
@@ -109,6 +110,7 @@ static __always_inline __must_check
int __copy_from_user(void *dst, const void __user *src, unsigned size)
{
might_fault();
+ kasan_check_write(dst, size);
return __copy_from_user_nocheck(dst, src, size);
}
@@ -175,6 +177,7 @@ static __always_inline __must_check
int __copy_to_user(void __user *dst, const void *src, unsigned size)
{
might_fault();
+ kasan_check_read(src, size);
return __copy_to_user_nocheck(dst, src, size);
}
@@ -242,12 +245,14 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
static __must_check __always_inline int
__copy_from_user_inatomic(void *dst, const void __user *src, unsigned size)
{
+ kasan_check_write(dst, size);
return __copy_from_user_nocheck(dst, src, size);
}
static __must_check __always_inline int
__copy_to_user_inatomic(void __user *dst, const void *src, unsigned size)
{
+ kasan_check_read(src, size);
return __copy_to_user_nocheck(dst, src, size);
}
@@ -258,6 +263,7 @@ static inline int
__copy_from_user_nocache(void *dst, const void __user *src, unsigned size)
{
might_fault();
+ kasan_check_write(dst, size);
return __copy_user_nocache(dst, src, size, 1);
}
@@ -265,6 +271,7 @@ static inline int
__copy_from_user_inatomic_nocache(void *dst, const void __user *src,
unsigned size)
{
+ kasan_check_write(dst, size);
return __copy_user_nocache(dst, src, size, 0);
}
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index cd54147cb365..739c0c594022 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -216,9 +216,9 @@ struct kvm_cpuid_entry2 {
__u32 padding[3];
};
-#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0)
-#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1)
-#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2)
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX (1 << 0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC (1 << 1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT (1 << 2)
/* for KVM_SET_CPUID2 */
struct kvm_cpuid2 {
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 8a4add8e4639..3725e145aa58 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -2,10 +2,12 @@
#define _UAPI__SVM_H
#define SVM_EXIT_READ_CR0 0x000
+#define SVM_EXIT_READ_CR2 0x002
#define SVM_EXIT_READ_CR3 0x003
#define SVM_EXIT_READ_CR4 0x004
#define SVM_EXIT_READ_CR8 0x008
#define SVM_EXIT_WRITE_CR0 0x010
+#define SVM_EXIT_WRITE_CR2 0x012
#define SVM_EXIT_WRITE_CR3 0x013
#define SVM_EXIT_WRITE_CR4 0x014
#define SVM_EXIT_WRITE_CR8 0x018
@@ -73,15 +75,19 @@
#define SVM_EXIT_MWAIT_COND 0x08c
#define SVM_EXIT_XSETBV 0x08d
#define SVM_EXIT_NPF 0x400
+#define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401
+#define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402
#define SVM_EXIT_ERR -1
#define SVM_EXIT_REASONS \
{ SVM_EXIT_READ_CR0, "read_cr0" }, \
+ { SVM_EXIT_READ_CR2, "read_cr2" }, \
{ SVM_EXIT_READ_CR3, "read_cr3" }, \
{ SVM_EXIT_READ_CR4, "read_cr4" }, \
{ SVM_EXIT_READ_CR8, "read_cr8" }, \
{ SVM_EXIT_WRITE_CR0, "write_cr0" }, \
+ { SVM_EXIT_WRITE_CR2, "write_cr2" }, \
{ SVM_EXIT_WRITE_CR3, "write_cr3" }, \
{ SVM_EXIT_WRITE_CR4, "write_cr4" }, \
{ SVM_EXIT_WRITE_CR8, "write_cr8" }, \
@@ -89,32 +95,66 @@
{ SVM_EXIT_READ_DR1, "read_dr1" }, \
{ SVM_EXIT_READ_DR2, "read_dr2" }, \
{ SVM_EXIT_READ_DR3, "read_dr3" }, \
+ { SVM_EXIT_READ_DR4, "read_dr4" }, \
+ { SVM_EXIT_READ_DR5, "read_dr5" }, \
+ { SVM_EXIT_READ_DR6, "read_dr6" }, \
+ { SVM_EXIT_READ_DR7, "read_dr7" }, \
{ SVM_EXIT_WRITE_DR0, "write_dr0" }, \
{ SVM_EXIT_WRITE_DR1, "write_dr1" }, \
{ SVM_EXIT_WRITE_DR2, "write_dr2" }, \
{ SVM_EXIT_WRITE_DR3, "write_dr3" }, \
+ { SVM_EXIT_WRITE_DR4, "write_dr4" }, \
{ SVM_EXIT_WRITE_DR5, "write_dr5" }, \
+ { SVM_EXIT_WRITE_DR6, "write_dr6" }, \
{ SVM_EXIT_WRITE_DR7, "write_dr7" }, \
+ { SVM_EXIT_EXCP_BASE + DE_VECTOR, "DE excp" }, \
{ SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \
{ SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \
+ { SVM_EXIT_EXCP_BASE + OF_VECTOR, "OF excp" }, \
+ { SVM_EXIT_EXCP_BASE + BR_VECTOR, "BR excp" }, \
{ SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \
- { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \
{ SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \
+ { SVM_EXIT_EXCP_BASE + DF_VECTOR, "DF excp" }, \
+ { SVM_EXIT_EXCP_BASE + TS_VECTOR, "TS excp" }, \
+ { SVM_EXIT_EXCP_BASE + NP_VECTOR, "NP excp" }, \
+ { SVM_EXIT_EXCP_BASE + SS_VECTOR, "SS excp" }, \
+ { SVM_EXIT_EXCP_BASE + GP_VECTOR, "GP excp" }, \
+ { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \
+ { SVM_EXIT_EXCP_BASE + MF_VECTOR, "MF excp" }, \
{ SVM_EXIT_EXCP_BASE + AC_VECTOR, "AC excp" }, \
{ SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \
+ { SVM_EXIT_EXCP_BASE + XM_VECTOR, "XF excp" }, \
{ SVM_EXIT_INTR, "interrupt" }, \
{ SVM_EXIT_NMI, "nmi" }, \
{ SVM_EXIT_SMI, "smi" }, \
{ SVM_EXIT_INIT, "init" }, \
{ SVM_EXIT_VINTR, "vintr" }, \
+ { SVM_EXIT_CR0_SEL_WRITE, "cr0_sel_write" }, \
+ { SVM_EXIT_IDTR_READ, "read_idtr" }, \
+ { SVM_EXIT_GDTR_READ, "read_gdtr" }, \
+ { SVM_EXIT_LDTR_READ, "read_ldtr" }, \
+ { SVM_EXIT_TR_READ, "read_rt" }, \
+ { SVM_EXIT_IDTR_WRITE, "write_idtr" }, \
+ { SVM_EXIT_GDTR_WRITE, "write_gdtr" }, \
+ { SVM_EXIT_LDTR_WRITE, "write_ldtr" }, \
+ { SVM_EXIT_TR_WRITE, "write_rt" }, \
+ { SVM_EXIT_RDTSC, "rdtsc" }, \
+ { SVM_EXIT_RDPMC, "rdpmc" }, \
+ { SVM_EXIT_PUSHF, "pushf" }, \
+ { SVM_EXIT_POPF, "popf" }, \
{ SVM_EXIT_CPUID, "cpuid" }, \
+ { SVM_EXIT_RSM, "rsm" }, \
+ { SVM_EXIT_IRET, "iret" }, \
+ { SVM_EXIT_SWINT, "swint" }, \
{ SVM_EXIT_INVD, "invd" }, \
+ { SVM_EXIT_PAUSE, "pause" }, \
{ SVM_EXIT_HLT, "hlt" }, \
{ SVM_EXIT_INVLPG, "invlpg" }, \
{ SVM_EXIT_INVLPGA, "invlpga" }, \
{ SVM_EXIT_IOIO, "io" }, \
{ SVM_EXIT_MSR, "msr" }, \
{ SVM_EXIT_TASK_SWITCH, "task_switch" }, \
+ { SVM_EXIT_FERR_FREEZE, "ferr_freeze" }, \
{ SVM_EXIT_SHUTDOWN, "shutdown" }, \
{ SVM_EXIT_VMRUN, "vmrun" }, \
{ SVM_EXIT_VMMCALL, "hypercall" }, \
@@ -123,11 +163,16 @@
{ SVM_EXIT_STGI, "stgi" }, \
{ SVM_EXIT_CLGI, "clgi" }, \
{ SVM_EXIT_SKINIT, "skinit" }, \
+ { SVM_EXIT_RDTSCP, "rdtscp" }, \
+ { SVM_EXIT_ICEBP, "icebp" }, \
{ SVM_EXIT_WBINVD, "wbinvd" }, \
{ SVM_EXIT_MONITOR, "monitor" }, \
{ SVM_EXIT_MWAIT, "mwait" }, \
{ SVM_EXIT_XSETBV, "xsetbv" }, \
- { SVM_EXIT_NPF, "npf" }
+ { SVM_EXIT_NPF, "npf" }, \
+ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
+ { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \
+ { SVM_EXIT_ERR, "invalid_guest_state" }
#endif /* _UAPI__SVM_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9abf8551c7e4..0503f5bfb18d 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -83,7 +83,6 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-y += apic/
obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
-obj-$(CONFIG_LIVEPATCH) += livepatch.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
obj-$(CONFIG_X86_TSC) += trace_clock.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index f115a58f7c84..9414f84584e4 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -445,7 +445,6 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger,
polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
- acpi_penalize_sci_irq(bus_irq, trigger, polarity);
/*
* stash over-ride to indicate we've been here
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 045e424fb368..7788ce643bf4 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -18,7 +18,6 @@
#include <linux/nmi.h>
#include <linux/module.h>
#include <linux/delay.h>
-#include <linux/seq_buf.h>
#ifdef CONFIG_HARDLOCKUP_DETECTOR
u64 hw_nmi_get_sample_period(int watchdog_thresh)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 6ef6ed9ccca6..0fe6953f421c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -37,6 +37,7 @@
#include <asm/mtrr.h>
#include <linux/numa.h>
#include <asm/asm.h>
+#include <asm/bugs.h>
#include <asm/cpu.h>
#include <asm/mce.h>
#include <asm/msr.h>
@@ -270,6 +271,8 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
static __init int setup_disable_smep(char *arg)
{
setup_clear_cpu_cap(X86_FEATURE_SMEP);
+ /* Check for things that depend on SMEP being enabled: */
+ check_mpx_erratum(&boot_cpu_data);
return 1;
}
__setup("nosmep", setup_disable_smep);
@@ -310,6 +313,10 @@ static bool pku_disabled;
static __always_inline void setup_pku(struct cpuinfo_x86 *c)
{
+ /* check the boot processor, plus compile options for PKU: */
+ if (!cpu_feature_enabled(X86_FEATURE_PKU))
+ return;
+ /* checks the actual processor's cpuid bits: */
if (!cpu_has(c, X86_FEATURE_PKU))
return;
if (pku_disabled)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 8dae51fd3db1..6e2ffbebbcdb 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -25,6 +25,41 @@
#include <asm/apic.h>
#endif
+/*
+ * Just in case our CPU detection goes bad, or you have a weird system,
+ * allow a way to override the automatic disabling of MPX.
+ */
+static int forcempx;
+
+static int __init forcempx_setup(char *__unused)
+{
+ forcempx = 1;
+
+ return 1;
+}
+__setup("intel-skd-046-workaround=disable", forcempx_setup);
+
+void check_mpx_erratum(struct cpuinfo_x86 *c)
+{
+ if (forcempx)
+ return;
+ /*
+ * Turn off the MPX feature on CPUs where SMEP is not
+ * available or disabled.
+ *
+ * Works around Intel Erratum SKD046: "Branch Instructions
+ * May Initialize MPX Bound Registers Incorrectly".
+ *
+ * This might falsely disable MPX on systems without
+ * SMEP, like Atom processors without SMEP. But there
+ * is no such hardware known at the moment.
+ */
+ if (cpu_has(c, X86_FEATURE_MPX) && !cpu_has(c, X86_FEATURE_SMEP)) {
+ setup_clear_cpu_cap(X86_FEATURE_MPX);
+ pr_warn("x86/mpx: Disabling MPX since SMEP not present\n");
+ }
+}
+
static void early_init_intel(struct cpuinfo_x86 *c)
{
u64 misc_enable;
@@ -173,6 +208,8 @@ static void early_init_intel(struct cpuinfo_x86 *c)
if (edx & (1U << 28))
c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff);
}
+
+ check_mpx_erratum(c);
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index cbb3cf09b065..65cbbcd48fe4 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -422,7 +422,7 @@ static void show_saved_mc(void)
data_size = get_datasize(mc_saved_header);
date = mc_saved_header->date;
- pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, toal size=0x%x, date = %04x-%02x-%02x\n",
+ pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, total size=0x%x, date = %04x-%02x-%02x\n",
i, sig, pf, rev, total_size,
date & 0xffff,
date >> 24,
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 2af478e3fd4e..f2356bda2b05 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -19,8 +19,7 @@
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/efi.h>
-#include <linux/verify_pefile.h>
-#include <keys/system_keyring.h>
+#include <linux/verification.h>
#include <asm/bootparam.h>
#include <asm/setup.h>
@@ -529,18 +528,9 @@ static int bzImage64_cleanup(void *loader_data)
#ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG
static int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len)
{
- bool trusted;
- int ret;
-
- ret = verify_pefile_signature(kernel, kernel_len,
- system_trusted_keyring,
- VERIFYING_KEXEC_PE_SIGNATURE,
- &trusted);
- if (ret < 0)
- return ret;
- if (!trusted)
- return -EKEYREJECTED;
- return 0;
+ return verify_pefile_signature(kernel, kernel_len,
+ NULL,
+ VERIFYING_KEXEC_PE_SIGNATURE);
}
#endif
diff --git a/arch/x86/kernel/livepatch.c b/arch/x86/kernel/livepatch.c
deleted file mode 100644
index 92fc1a51f994..000000000000
--- a/arch/x86/kernel/livepatch.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * livepatch.c - x86-specific Kernel Live Patching Core
- *
- * Copyright (C) 2014 Seth Jennings <sjenning@redhat.com>
- * Copyright (C) 2014 SUSE
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/module.h>
-#include <linux/uaccess.h>
-#include <asm/elf.h>
-#include <asm/livepatch.h>
-
-/**
- * klp_write_module_reloc() - write a relocation in a module
- * @mod: module in which the section to be modified is found
- * @type: ELF relocation type (see asm/elf.h)
- * @loc: address that the relocation should be written to
- * @value: relocation value (sym address + addend)
- *
- * This function writes a relocation to the specified location for
- * a particular module.
- */
-int klp_write_module_reloc(struct module *mod, unsigned long type,
- unsigned long loc, unsigned long value)
-{
- size_t size = 4;
- unsigned long val;
- unsigned long core = (unsigned long)mod->core_layout.base;
- unsigned long core_size = mod->core_layout.size;
-
- switch (type) {
- case R_X86_64_NONE:
- return 0;
- case R_X86_64_64:
- val = value;
- size = 8;
- break;
- case R_X86_64_32:
- val = (u32)value;
- break;
- case R_X86_64_32S:
- val = (s32)value;
- break;
- case R_X86_64_PC32:
- val = (u32)(value - loc);
- break;
- default:
- /* unsupported relocation type */
- return -EINVAL;
- }
-
- if (loc < core || loc >= core + core_size)
- /* loc does not point to any symbol inside the module */
- return -EINVAL;
-
- return probe_kernel_write((void *)loc, &val, size);
-}
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index ba7fbba9831b..5a294e48b185 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -538,3 +538,48 @@ overflow:
return -ENOEXEC;
}
#endif /* CONFIG_KEXEC_FILE */
+
+static int
+kexec_mark_range(unsigned long start, unsigned long end, bool protect)
+{
+ struct page *page;
+ unsigned int nr_pages;
+
+ /*
+ * For physical range: [start, end]. We must skip the unassigned
+ * crashk resource with zero-valued "end" member.
+ */
+ if (!end || start > end)
+ return 0;
+
+ page = pfn_to_page(start >> PAGE_SHIFT);
+ nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
+ if (protect)
+ return set_pages_ro(page, nr_pages);
+ else
+ return set_pages_rw(page, nr_pages);
+}
+
+static void kexec_mark_crashkres(bool protect)
+{
+ unsigned long control;
+
+ kexec_mark_range(crashk_low_res.start, crashk_low_res.end, protect);
+
+ /* Don't touch the control code page used in crash_kexec().*/
+ control = PFN_PHYS(page_to_pfn(kexec_crash_image->control_code_page));
+ /* Control code page is located in the 2nd page. */
+ kexec_mark_range(crashk_res.start, control + PAGE_SIZE - 1, protect);
+ control += KEXEC_CONTROL_PAGE_SIZE;
+ kexec_mark_range(control, crashk_res.end, protect);
+}
+
+void arch_kexec_protect_crashkres(void)
+{
+ kexec_mark_crashkres(true);
+}
+
+void arch_kexec_unprotect_crashkres(void)
+{
+ kexec_mark_crashkres(false);
+}
diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
index ed48a9f465f8..61924222a9e1 100644
--- a/arch/x86/kernel/mcount_64.S
+++ b/arch/x86/kernel/mcount_64.S
@@ -182,7 +182,8 @@ GLOBAL(ftrace_graph_call)
jmp ftrace_stub
#endif
-GLOBAL(ftrace_stub)
+/* This is weak to keep gas from relaxing the jumps */
+WEAK(ftrace_stub)
retq
END(ftrace_caller)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 2915d54e9dd5..96becbbb52e0 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -97,10 +97,9 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
/*
* Free current thread data structures etc..
*/
-void exit_thread(void)
+void exit_thread(struct task_struct *tsk)
{
- struct task_struct *me = current;
- struct thread_struct *t = &me->thread;
+ struct thread_struct *t = &tsk->thread;
unsigned long *bp = t->io_bitmap_ptr;
struct fpu *fpu = &t->fpu;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6b16c36f0939..6e789ca1f841 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -532,7 +532,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
switch (code) {
case ARCH_SET_GS:
- if (addr >= TASK_SIZE_OF(task))
+ if (addr >= TASK_SIZE_MAX)
return -EPERM;
cpu = get_cpu();
task->thread.gsindex = 0;
@@ -546,7 +546,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
case ARCH_SET_FS:
/* Not strictly needed for fs, but do it for symmetry
with gs */
- if (addr >= TASK_SIZE_OF(task))
+ if (addr >= TASK_SIZE_MAX)
return -EPERM;
cpu = get_cpu();
task->thread.fsindex = 0;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index e60ef918f53d..600edd225e81 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -392,7 +392,7 @@ static int putreg(struct task_struct *child,
#ifdef CONFIG_X86_64
case offsetof(struct user_regs_struct,fs_base):
- if (value >= TASK_SIZE_OF(child))
+ if (value >= TASK_SIZE_MAX)
return -EIO;
/*
* When changing the segment base, use do_arch_prctl
@@ -406,7 +406,7 @@ static int putreg(struct task_struct *child,
/*
* Exactly the same here as the %fs handling above.
*/
- if (value >= TASK_SIZE_OF(child))
+ if (value >= TASK_SIZE_MAX)
return -EIO;
if (child->thread.gsbase != value)
return do_arch_prctl(child, ARCH_SET_GS, value);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 2367ae07eb76..c4e7b3991b60 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -398,6 +398,11 @@ static void __init reserve_initrd(void)
memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
}
+
+static void __init early_initrd_acpi_init(void)
+{
+ early_acpi_table_init((void *)initrd_start, initrd_end - initrd_start);
+}
#else
static void __init early_reserve_initrd(void)
{
@@ -405,6 +410,9 @@ static void __init early_reserve_initrd(void)
static void __init reserve_initrd(void)
{
}
+static void __init early_initrd_acpi_init(void)
+{
+}
#endif /* CONFIG_BLK_DEV_INITRD */
static void __init parse_setup_data(void)
@@ -1138,9 +1146,7 @@ void __init setup_arch(char **cmdline_p)
reserve_initrd();
-#if defined(CONFIG_ACPI) && defined(CONFIG_BLK_DEV_INITRD)
- acpi_initrd_override((void *)initrd_start, initrd_end - initrd_start);
-#endif
+ early_initrd_acpi_init();
vsmp_init();
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 6aa0f4d9eea6..9911a0620f9a 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -23,6 +23,7 @@
#include <asm/param.h>
/* CPU reference clock frequency: in KHz */
+#define FREQ_80 80000
#define FREQ_83 83200
#define FREQ_100 99840
#define FREQ_133 133200
@@ -56,6 +57,8 @@ static struct freq_desc freq_desc_tables[] = {
{ 6, 0x37, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_166, 0, 0, 0, 0 } },
/* ANN */
{ 6, 0x5a, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_100, 0, 0, 0, 0 } },
+ /* AIRMONT */
+ { 6, 0x4c, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_166, FREQ_80, 0, 0, 0 } },
};
static int match_cpu(u8 family, u8 model)
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 769af907f824..7597b42a8a88 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -181,19 +181,22 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
struct kvm_cpuid_entry __user *entries)
{
int r, i;
- struct kvm_cpuid_entry *cpuid_entries;
+ struct kvm_cpuid_entry *cpuid_entries = NULL;
r = -E2BIG;
if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
goto out;
r = -ENOMEM;
- cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent);
- if (!cpuid_entries)
- goto out;
- r = -EFAULT;
- if (copy_from_user(cpuid_entries, entries,
- cpuid->nent * sizeof(struct kvm_cpuid_entry)))
- goto out_free;
+ if (cpuid->nent) {
+ cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) *
+ cpuid->nent);
+ if (!cpuid_entries)
+ goto out;
+ r = -EFAULT;
+ if (copy_from_user(cpuid_entries, entries,
+ cpuid->nent * sizeof(struct kvm_cpuid_entry)))
+ goto out;
+ }
for (i = 0; i < cpuid->nent; i++) {
vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
@@ -212,9 +215,8 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
kvm_x86_ops->cpuid_update(vcpu);
r = kvm_update_cpuid(vcpu);
-out_free:
- vfree(cpuid_entries);
out:
+ vfree(cpuid_entries);
return r;
}
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 9db47090ead0..5f42d038fcb4 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -443,7 +443,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
spin_lock(&ioapic->lock);
if (trigger_mode != IOAPIC_LEVEL_TRIG ||
- kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)
+ kvm_lapic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)
continue;
ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c
index a22a488b4622..3069281904d3 100644
--- a/arch/x86/kvm/iommu.c
+++ b/arch/x86/kvm/iommu.c
@@ -254,7 +254,7 @@ int kvm_iommu_map_guest(struct kvm *kvm)
!iommu_capable(&pci_bus_type, IOMMU_CAP_INTR_REMAP)) {
printk(KERN_WARNING "%s: No interrupt remapping support,"
" disallowing device assignment."
- " Re-enble with \"allow_unsafe_assigned_interrupts=1\""
+ " Re-enable with \"allow_unsafe_assigned_interrupts=1\""
" module option.\n", __func__);
iommu_domain_free(kvm->arch.iommu_domain);
kvm->arch.iommu_domain = NULL;
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 54ead79e444b..dfb4c6476877 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -382,9 +382,6 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
u32 i, nr_ioapic_pins;
int idx;
- /* kvm->irq_routing must be read after clearing
- * KVM_SCAN_IOAPIC. */
- smp_mb();
idx = srcu_read_lock(&kvm->irq_srcu);
table = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
nr_ioapic_pins = min_t(u32, table->nr_rt_entries,
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 1a2da0e5a373..bbb5b283ff63 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -59,9 +59,8 @@
/* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
#define apic_debug(fmt, arg...)
-#define APIC_LVT_NUM 6
/* 14 is the version for Xeon and Pentium 8.4.8*/
-#define APIC_VERSION (0x14UL | ((APIC_LVT_NUM - 1) << 16))
+#define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
#define LAPIC_MMIO_LENGTH (1 << 12)
/* followed define is not in apicdef.h */
#define APIC_SHORT_MASK 0xc0000
@@ -73,14 +72,6 @@
#define APIC_BROADCAST 0xFF
#define X2APIC_BROADCAST 0xFFFFFFFFul
-#define VEC_POS(v) ((v) & (32 - 1))
-#define REG_POS(v) (((v) >> 5) << 4)
-
-static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
-{
- *((u32 *) (apic->regs + reg_off)) = val;
-}
-
static inline int apic_test_vector(int vec, void *bitmap)
{
return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -94,11 +85,6 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
apic_test_vector(vector, apic->regs + APIC_IRR);
}
-static inline void apic_set_vector(int vec, void *bitmap)
-{
- set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
-}
-
static inline void apic_clear_vector(int vec, void *bitmap)
{
clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -173,7 +159,7 @@ static void recalculate_apic_map(struct kvm *kvm)
continue;
aid = kvm_apic_id(apic);
- ldr = kvm_apic_get_reg(apic, APIC_LDR);
+ ldr = kvm_lapic_get_reg(apic, APIC_LDR);
if (aid < ARRAY_SIZE(new->phys_map))
new->phys_map[aid] = apic;
@@ -182,7 +168,7 @@ static void recalculate_apic_map(struct kvm *kvm)
new->mode |= KVM_APIC_MODE_X2APIC;
} else if (ldr) {
ldr = GET_APIC_LOGICAL_ID(ldr);
- if (kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
+ if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
else
new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
@@ -212,7 +198,7 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
{
bool enabled = val & APIC_SPIV_APIC_ENABLED;
- apic_set_reg(apic, APIC_SPIV, val);
+ kvm_lapic_set_reg(apic, APIC_SPIV, val);
if (enabled != apic->sw_enabled) {
apic->sw_enabled = enabled;
@@ -226,13 +212,13 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
{
- apic_set_reg(apic, APIC_ID, id << 24);
+ kvm_lapic_set_reg(apic, APIC_ID, id << 24);
recalculate_apic_map(apic->vcpu->kvm);
}
static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
{
- apic_set_reg(apic, APIC_LDR, id);
+ kvm_lapic_set_reg(apic, APIC_LDR, id);
recalculate_apic_map(apic->vcpu->kvm);
}
@@ -240,19 +226,19 @@ static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u8 id)
{
u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
- apic_set_reg(apic, APIC_ID, id << 24);
- apic_set_reg(apic, APIC_LDR, ldr);
+ kvm_lapic_set_reg(apic, APIC_ID, id << 24);
+ kvm_lapic_set_reg(apic, APIC_LDR, ldr);
recalculate_apic_map(apic->vcpu->kvm);
}
static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
{
- return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
+ return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
}
static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
{
- return kvm_apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
+ return kvm_lapic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
}
static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
@@ -287,10 +273,10 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))))
v |= APIC_LVR_DIRECTED_EOI;
- apic_set_reg(apic, APIC_LVR, v);
+ kvm_lapic_set_reg(apic, APIC_LVR, v);
}
-static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
+static const unsigned int apic_lvt_mask[KVM_APIC_LVT_NUM] = {
LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */
LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */
LVT_MASK | APIC_MODE_MASK, /* LVTPC */
@@ -349,16 +335,6 @@ void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
}
EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
-static inline void apic_set_irr(int vec, struct kvm_lapic *apic)
-{
- apic_set_vector(vec, apic->regs + APIC_IRR);
- /*
- * irr_pending must be true if any interrupt is pending; set it after
- * APIC_IRR to avoid race with apic_clear_irr
- */
- apic->irr_pending = true;
-}
-
static inline int apic_search_irr(struct kvm_lapic *apic)
{
return find_highest_vector(apic->regs + APIC_IRR);
@@ -416,7 +392,7 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
* just set SVI.
*/
if (unlikely(vcpu->arch.apicv_active))
- kvm_x86_ops->hwapic_isr_update(vcpu->kvm, vec);
+ kvm_x86_ops->hwapic_isr_update(vcpu, vec);
else {
++apic->isr_count;
BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
@@ -464,7 +440,7 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
* and must be left alone.
*/
if (unlikely(vcpu->arch.apicv_active))
- kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
+ kvm_x86_ops->hwapic_isr_update(vcpu,
apic_find_highest_isr(apic));
else {
--apic->isr_count;
@@ -549,8 +525,8 @@ static void apic_update_ppr(struct kvm_lapic *apic)
u32 tpr, isrv, ppr, old_ppr;
int isr;
- old_ppr = kvm_apic_get_reg(apic, APIC_PROCPRI);
- tpr = kvm_apic_get_reg(apic, APIC_TASKPRI);
+ old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI);
+ tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI);
isr = apic_find_highest_isr(apic);
isrv = (isr != -1) ? isr : 0;
@@ -563,7 +539,7 @@ static void apic_update_ppr(struct kvm_lapic *apic)
apic, ppr, isr, isrv);
if (old_ppr != ppr) {
- apic_set_reg(apic, APIC_PROCPRI, ppr);
+ kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
if (ppr < old_ppr)
kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
}
@@ -571,7 +547,7 @@ static void apic_update_ppr(struct kvm_lapic *apic)
static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
{
- apic_set_reg(apic, APIC_TASKPRI, tpr);
+ kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr);
apic_update_ppr(apic);
}
@@ -601,7 +577,7 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
if (kvm_apic_broadcast(apic, mda))
return true;
- logical_id = kvm_apic_get_reg(apic, APIC_LDR);
+ logical_id = kvm_lapic_get_reg(apic, APIC_LDR);
if (apic_x2apic_mode(apic))
return ((logical_id >> 16) == (mda >> 16))
@@ -610,7 +586,7 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
logical_id = GET_APIC_LOGICAL_ID(logical_id);
mda = GET_APIC_DEST_FIELD(mda);
- switch (kvm_apic_get_reg(apic, APIC_DFR)) {
+ switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
case APIC_DFR_FLAT:
return (logical_id & mda) != 0;
case APIC_DFR_CLUSTER:
@@ -618,7 +594,7 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
&& (logical_id & mda & 0xf) != 0;
default:
apic_debug("Bad DFR vcpu %d: %08x\n",
- apic->vcpu->vcpu_id, kvm_apic_get_reg(apic, APIC_DFR));
+ apic->vcpu->vcpu_id, kvm_lapic_get_reg(apic, APIC_DFR));
return false;
}
}
@@ -668,6 +644,7 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
return false;
}
}
+EXPORT_SYMBOL_GPL(kvm_apic_match_dest);
int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
const unsigned long *bitmap, u32 bitmap_size)
@@ -921,7 +898,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
if (trig_mode)
- apic_set_vector(vector, apic->regs + APIC_TMR);
+ kvm_lapic_set_vector(vector, apic->regs + APIC_TMR);
else
apic_clear_vector(vector, apic->regs + APIC_TMR);
}
@@ -929,7 +906,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
if (vcpu->arch.apicv_active)
kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
else {
- apic_set_irr(vector, apic);
+ kvm_lapic_set_irr(vector, apic);
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
@@ -1073,8 +1050,8 @@ EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
static void apic_send_ipi(struct kvm_lapic *apic)
{
- u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR);
- u32 icr_high = kvm_apic_get_reg(apic, APIC_ICR2);
+ u32 icr_low = kvm_lapic_get_reg(apic, APIC_ICR);
+ u32 icr_high = kvm_lapic_get_reg(apic, APIC_ICR2);
struct kvm_lapic_irq irq;
irq.vector = icr_low & APIC_VECTOR_MASK;
@@ -1111,7 +1088,7 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
ASSERT(apic != NULL);
/* if initial count is 0, current count should also be 0 */
- if (kvm_apic_get_reg(apic, APIC_TMICT) == 0 ||
+ if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 ||
apic->lapic_timer.period == 0)
return 0;
@@ -1168,13 +1145,13 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
break;
case APIC_PROCPRI:
apic_update_ppr(apic);
- val = kvm_apic_get_reg(apic, offset);
+ val = kvm_lapic_get_reg(apic, offset);
break;
case APIC_TASKPRI:
report_tpr_access(apic, false);
/* fall thru */
default:
- val = kvm_apic_get_reg(apic, offset);
+ val = kvm_lapic_get_reg(apic, offset);
break;
}
@@ -1186,7 +1163,7 @@ static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
return container_of(dev, struct kvm_lapic, dev);
}
-static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
+int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
void *data)
{
unsigned char alignment = offset & 0xf;
@@ -1223,6 +1200,7 @@ static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
}
return 0;
}
+EXPORT_SYMBOL_GPL(kvm_lapic_reg_read);
static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
{
@@ -1240,7 +1218,7 @@ static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
if (!apic_mmio_in_range(apic, address))
return -EOPNOTSUPP;
- apic_reg_read(apic, offset, len, data);
+ kvm_lapic_reg_read(apic, offset, len, data);
return 0;
}
@@ -1249,7 +1227,7 @@ static void update_divide_count(struct kvm_lapic *apic)
{
u32 tmp1, tmp2, tdcr;
- tdcr = kvm_apic_get_reg(apic, APIC_TDCR);
+ tdcr = kvm_lapic_get_reg(apic, APIC_TDCR);
tmp1 = tdcr & 0xf;
tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
apic->divide_count = 0x1 << (tmp2 & 0x7);
@@ -1260,7 +1238,7 @@ static void update_divide_count(struct kvm_lapic *apic)
static void apic_update_lvtt(struct kvm_lapic *apic)
{
- u32 timer_mode = kvm_apic_get_reg(apic, APIC_LVTT) &
+ u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
apic->lapic_timer.timer_mode_mask;
if (apic->lapic_timer.timer_mode != timer_mode) {
@@ -1296,7 +1274,7 @@ static void apic_timer_expired(struct kvm_lapic *apic)
static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- u32 reg = kvm_apic_get_reg(apic, APIC_LVTT);
+ u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT);
if (kvm_apic_hw_enabled(apic)) {
int vec = reg & APIC_VECTOR_MASK;
@@ -1344,7 +1322,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
/* lapic timer in oneshot or periodic mode */
now = apic->lapic_timer.timer.base->get_time();
- apic->lapic_timer.period = (u64)kvm_apic_get_reg(apic, APIC_TMICT)
+ apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
* APIC_BUS_CYCLE_NS * apic->divide_count;
if (!apic->lapic_timer.period)
@@ -1376,7 +1354,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
"timer initial count 0x%x, period %lldns, "
"expire @ 0x%016" PRIx64 ".\n", __func__,
APIC_BUS_CYCLE_NS, ktime_to_ns(now),
- kvm_apic_get_reg(apic, APIC_TMICT),
+ kvm_lapic_get_reg(apic, APIC_TMICT),
apic->lapic_timer.period,
ktime_to_ns(ktime_add_ns(now,
apic->lapic_timer.period)));
@@ -1425,7 +1403,7 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
}
}
-static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
+int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
{
int ret = 0;
@@ -1457,7 +1435,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_DFR:
if (!apic_x2apic_mode(apic)) {
- apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
+ kvm_lapic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
recalculate_apic_map(apic->vcpu->kvm);
} else
ret = 1;
@@ -1465,17 +1443,17 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_SPIV: {
u32 mask = 0x3ff;
- if (kvm_apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
+ if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
mask |= APIC_SPIV_DIRECTED_EOI;
apic_set_spiv(apic, val & mask);
if (!(val & APIC_SPIV_APIC_ENABLED)) {
int i;
u32 lvt_val;
- for (i = 0; i < APIC_LVT_NUM; i++) {
- lvt_val = kvm_apic_get_reg(apic,
+ for (i = 0; i < KVM_APIC_LVT_NUM; i++) {
+ lvt_val = kvm_lapic_get_reg(apic,
APIC_LVTT + 0x10 * i);
- apic_set_reg(apic, APIC_LVTT + 0x10 * i,
+ kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i,
lvt_val | APIC_LVT_MASKED);
}
apic_update_lvtt(apic);
@@ -1486,14 +1464,14 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
}
case APIC_ICR:
/* No delay here, so we always clear the pending bit */
- apic_set_reg(apic, APIC_ICR, val & ~(1 << 12));
+ kvm_lapic_set_reg(apic, APIC_ICR, val & ~(1 << 12));
apic_send_ipi(apic);
break;
case APIC_ICR2:
if (!apic_x2apic_mode(apic))
val &= 0xff000000;
- apic_set_reg(apic, APIC_ICR2, val);
+ kvm_lapic_set_reg(apic, APIC_ICR2, val);
break;
case APIC_LVT0:
@@ -1507,7 +1485,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
val |= APIC_LVT_MASKED;
val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4];
- apic_set_reg(apic, reg, val);
+ kvm_lapic_set_reg(apic, reg, val);
break;
@@ -1515,7 +1493,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
if (!kvm_apic_sw_enabled(apic))
val |= APIC_LVT_MASKED;
val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
- apic_set_reg(apic, APIC_LVTT, val);
+ kvm_lapic_set_reg(apic, APIC_LVTT, val);
apic_update_lvtt(apic);
break;
@@ -1524,14 +1502,14 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
break;
hrtimer_cancel(&apic->lapic_timer.timer);
- apic_set_reg(apic, APIC_TMICT, val);
+ kvm_lapic_set_reg(apic, APIC_TMICT, val);
start_apic_timer(apic);
break;
case APIC_TDCR:
if (val & 4)
apic_debug("KVM_WRITE:TDCR %x\n", val);
- apic_set_reg(apic, APIC_TDCR, val);
+ kvm_lapic_set_reg(apic, APIC_TDCR, val);
update_divide_count(apic);
break;
@@ -1544,7 +1522,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_SELF_IPI:
if (apic_x2apic_mode(apic)) {
- apic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff));
+ kvm_lapic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff));
} else
ret = 1;
break;
@@ -1556,6 +1534,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
apic_debug("Local APIC Write to read-only register %x\n", reg);
return ret;
}
+EXPORT_SYMBOL_GPL(kvm_lapic_reg_write);
static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
gpa_t address, int len, const void *data)
@@ -1585,14 +1564,14 @@ static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
apic_debug("%s: offset 0x%x with length 0x%x, and value is "
"0x%x\n", __func__, offset, len, val);
- apic_reg_write(apic, offset & 0xff0, val);
+ kvm_lapic_reg_write(apic, offset & 0xff0, val);
return 0;
}
void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
{
- apic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
+ kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
}
EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
@@ -1604,10 +1583,10 @@ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
/* hw has done the conditional check and inst decode */
offset &= 0xff0;
- apic_reg_read(vcpu->arch.apic, offset, 4, &val);
+ kvm_lapic_reg_read(vcpu->arch.apic, offset, 4, &val);
/* TODO: optimize to just emulate side effect w/o one more write */
- apic_reg_write(vcpu->arch.apic, offset, val);
+ kvm_lapic_reg_write(vcpu->arch.apic, offset, val);
}
EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
@@ -1667,14 +1646,14 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
struct kvm_lapic *apic = vcpu->arch.apic;
apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
- | (kvm_apic_get_reg(apic, APIC_TASKPRI) & 4));
+ | (kvm_lapic_get_reg(apic, APIC_TASKPRI) & 4));
}
u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
{
u64 tpr;
- tpr = (u64) kvm_apic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
+ tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
return (tpr & 0xf0) >> 4;
}
@@ -1740,28 +1719,28 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_apic_set_id(apic, vcpu->vcpu_id);
kvm_apic_set_version(apic->vcpu);
- for (i = 0; i < APIC_LVT_NUM; i++)
- apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
+ for (i = 0; i < KVM_APIC_LVT_NUM; i++)
+ kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
apic_update_lvtt(apic);
if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
- apic_set_reg(apic, APIC_LVT0,
+ kvm_lapic_set_reg(apic, APIC_LVT0,
SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
- apic_manage_nmi_watchdog(apic, kvm_apic_get_reg(apic, APIC_LVT0));
+ apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
- apic_set_reg(apic, APIC_DFR, 0xffffffffU);
+ kvm_lapic_set_reg(apic, APIC_DFR, 0xffffffffU);
apic_set_spiv(apic, 0xff);
- apic_set_reg(apic, APIC_TASKPRI, 0);
+ kvm_lapic_set_reg(apic, APIC_TASKPRI, 0);
if (!apic_x2apic_mode(apic))
kvm_apic_set_ldr(apic, 0);
- apic_set_reg(apic, APIC_ESR, 0);
- apic_set_reg(apic, APIC_ICR, 0);
- apic_set_reg(apic, APIC_ICR2, 0);
- apic_set_reg(apic, APIC_TDCR, 0);
- apic_set_reg(apic, APIC_TMICT, 0);
+ kvm_lapic_set_reg(apic, APIC_ESR, 0);
+ kvm_lapic_set_reg(apic, APIC_ICR, 0);
+ kvm_lapic_set_reg(apic, APIC_ICR2, 0);
+ kvm_lapic_set_reg(apic, APIC_TDCR, 0);
+ kvm_lapic_set_reg(apic, APIC_TMICT, 0);
for (i = 0; i < 8; i++) {
- apic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
- apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
- apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
+ kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
+ kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
+ kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
}
apic->irr_pending = vcpu->arch.apicv_active;
apic->isr_count = vcpu->arch.apicv_active ? 1 : 0;
@@ -1806,7 +1785,7 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu)
int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
{
- u32 reg = kvm_apic_get_reg(apic, lvt_type);
+ u32 reg = kvm_lapic_get_reg(apic, lvt_type);
int vector, mode, trig_mode;
if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
@@ -1901,14 +1880,14 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
apic_update_ppr(apic);
highest_irr = apic_find_highest_irr(apic);
if ((highest_irr == -1) ||
- ((highest_irr & 0xF0) <= kvm_apic_get_reg(apic, APIC_PROCPRI)))
+ ((highest_irr & 0xF0) <= kvm_lapic_get_reg(apic, APIC_PROCPRI)))
return -1;
return highest_irr;
}
int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
{
- u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0);
+ u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0);
int r = 0;
if (!kvm_apic_hw_enabled(vcpu->arch.apic))
@@ -1974,7 +1953,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
apic_update_ppr(apic);
hrtimer_cancel(&apic->lapic_timer.timer);
apic_update_lvtt(apic);
- apic_manage_nmi_watchdog(apic, kvm_apic_get_reg(apic, APIC_LVT0));
+ apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
update_divide_count(apic);
start_apic_timer(apic);
apic->irr_pending = true;
@@ -1982,9 +1961,11 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
1 : count_vectors(apic->regs + APIC_ISR);
apic->highest_isr_cache = -1;
if (vcpu->arch.apicv_active) {
+ if (kvm_x86_ops->apicv_post_state_restore)
+ kvm_x86_ops->apicv_post_state_restore(vcpu);
kvm_x86_ops->hwapic_irr_update(vcpu,
apic_find_highest_irr(apic));
- kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
+ kvm_x86_ops->hwapic_isr_update(vcpu,
apic_find_highest_isr(apic));
}
kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -2097,7 +2078,7 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
return;
- tpr = kvm_apic_get_reg(apic, APIC_TASKPRI) & 0xff;
+ tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff;
max_irr = apic_find_highest_irr(apic);
if (max_irr < 0)
max_irr = 0;
@@ -2139,8 +2120,8 @@ int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
/* if this is ICR write vector before command */
if (reg == APIC_ICR)
- apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
- return apic_reg_write(apic, reg, (u32)data);
+ kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
+ return kvm_lapic_reg_write(apic, reg, (u32)data);
}
int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
@@ -2157,10 +2138,10 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
return 1;
}
- if (apic_reg_read(apic, reg, 4, &low))
+ if (kvm_lapic_reg_read(apic, reg, 4, &low))
return 1;
if (reg == APIC_ICR)
- apic_reg_read(apic, APIC_ICR2, 4, &high);
+ kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
*data = (((u64)high) << 32) | low;
@@ -2176,8 +2157,8 @@ int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
/* if this is ICR write vector before command */
if (reg == APIC_ICR)
- apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
- return apic_reg_write(apic, reg, (u32)data);
+ kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
+ return kvm_lapic_reg_write(apic, reg, (u32)data);
}
int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
@@ -2188,10 +2169,10 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
if (!lapic_in_kernel(vcpu))
return 1;
- if (apic_reg_read(apic, reg, 4, &low))
+ if (kvm_lapic_reg_read(apic, reg, 4, &low))
return 1;
if (reg == APIC_ICR)
- apic_reg_read(apic, APIC_ICR2, 4, &high);
+ kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
*data = (((u64)high) << 32) | low;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index f71183e502ee..891c6da7d4aa 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -7,6 +7,10 @@
#define KVM_APIC_INIT 0
#define KVM_APIC_SIPI 1
+#define KVM_APIC_LVT_NUM 6
+
+#define KVM_APIC_SHORT_MASK 0xc0000
+#define KVM_APIC_DEST_MASK 0x800
struct kvm_timer {
struct hrtimer timer;
@@ -59,6 +63,11 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu);
void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
void kvm_apic_set_version(struct kvm_vcpu *vcpu);
+int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val);
+int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
+ void *data);
+bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
+ int short_hand, unsigned int dest, int dest_mode);
void __kvm_apic_update_irr(u32 *pir, void *regs);
void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
@@ -99,9 +108,32 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
void kvm_lapic_init(void);
-static inline u32 kvm_apic_get_reg(struct kvm_lapic *apic, int reg_off)
+#define VEC_POS(v) ((v) & (32 - 1))
+#define REG_POS(v) (((v) >> 5) << 4)
+
+static inline void kvm_lapic_set_vector(int vec, void *bitmap)
+{
+ set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
+}
+
+static inline void kvm_lapic_set_irr(int vec, struct kvm_lapic *apic)
+{
+ kvm_lapic_set_vector(vec, apic->regs + APIC_IRR);
+ /*
+ * irr_pending must be true if any interrupt is pending; set it after
+ * APIC_IRR to avoid race with apic_clear_irr
+ */
+ apic->irr_pending = true;
+}
+
+static inline u32 kvm_lapic_get_reg(struct kvm_lapic *apic, int reg_off)
+{
+ return *((u32 *) (apic->regs + reg_off));
+}
+
+static inline void kvm_lapic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
{
- return *((u32 *) (apic->regs + reg_off));
+ *((u32 *) (apic->regs + reg_off)) = val;
}
extern struct static_key kvm_no_apic_vcpu;
@@ -169,7 +201,7 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
static inline int kvm_apic_id(struct kvm_lapic *apic)
{
- return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
+ return (kvm_lapic_get_reg(apic, APIC_ID) >> 24) & 0xff;
}
bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 38c0c32926c9..def97b3a392b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -336,12 +336,12 @@ static gfn_t pse36_gfn_delta(u32 gpte)
#ifdef CONFIG_X86_64
static void __set_spte(u64 *sptep, u64 spte)
{
- *sptep = spte;
+ WRITE_ONCE(*sptep, spte);
}
static void __update_clear_spte_fast(u64 *sptep, u64 spte)
{
- *sptep = spte;
+ WRITE_ONCE(*sptep, spte);
}
static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
@@ -390,7 +390,7 @@ static void __set_spte(u64 *sptep, u64 spte)
*/
smp_wmb();
- ssptep->spte_low = sspte.spte_low;
+ WRITE_ONCE(ssptep->spte_low, sspte.spte_low);
}
static void __update_clear_spte_fast(u64 *sptep, u64 spte)
@@ -400,7 +400,7 @@ static void __update_clear_spte_fast(u64 *sptep, u64 spte)
ssptep = (union split_spte *)sptep;
sspte = (union split_spte)spte;
- ssptep->spte_low = sspte.spte_low;
+ WRITE_ONCE(ssptep->spte_low, sspte.spte_low);
/*
* If we map the spte from present to nonpresent, we should clear
@@ -1909,18 +1909,17 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
* since it has been deleted from active_mmu_pages but still can be found
* at hast list.
*
- * for_each_gfn_indirect_valid_sp has skipped that kind of page and
- * kvm_mmu_get_page(), the only user of for_each_gfn_sp(), has skipped
- * all the obsolete pages.
+ * for_each_gfn_valid_sp() has skipped that kind of pages.
*/
-#define for_each_gfn_sp(_kvm, _sp, _gfn) \
+#define for_each_gfn_valid_sp(_kvm, _sp, _gfn) \
hlist_for_each_entry(_sp, \
&(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
- if ((_sp)->gfn != (_gfn)) {} else
+ if ((_sp)->gfn != (_gfn) || is_obsolete_sp((_kvm), (_sp)) \
+ || (_sp)->role.invalid) {} else
#define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \
- for_each_gfn_sp(_kvm, _sp, _gfn) \
- if ((_sp)->role.direct || (_sp)->role.invalid) {} else
+ for_each_gfn_valid_sp(_kvm, _sp, _gfn) \
+ if ((_sp)->role.direct) {} else
/* @sp->gfn should be write-protected at the call site */
static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
@@ -1961,6 +1960,11 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { }
static void mmu_audit_disable(void) { }
#endif
+static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+ return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
+}
+
static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
struct list_head *invalid_list)
{
@@ -2105,11 +2109,6 @@ static void clear_sp_write_flooding_count(u64 *spte)
__clear_sp_write_flooding_count(sp);
}
-static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
-{
- return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
-}
-
static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
gfn_t gfn,
gva_t gaddr,
@@ -2136,10 +2135,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
role.quadrant = quadrant;
}
- for_each_gfn_sp(vcpu->kvm, sp, gfn) {
- if (is_obsolete_sp(vcpu->kvm, sp))
- continue;
-
+ for_each_gfn_valid_sp(vcpu->kvm, sp, gfn) {
if (!need_sync && sp->unsync)
need_sync = true;
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index 3f8c732117ec..c146f3c262c3 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -44,8 +44,6 @@ static bool msr_mtrr_valid(unsigned msr)
case MSR_MTRRdefType:
case MSR_IA32_CR_PAT:
return true;
- case 0x2f8:
- return true;
}
return false;
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index fafd720ce10a..1163e8173e5a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -14,6 +14,9 @@
* the COPYING file in the top-level directory.
*
*/
+
+#define pr_fmt(fmt) "SVM: " fmt
+
#include <linux/kvm_host.h>
#include "irq.h"
@@ -32,6 +35,7 @@
#include <linux/trace_events.h>
#include <linux/slab.h>
+#include <asm/apic.h>
#include <asm/perf_event.h>
#include <asm/tlbflush.h>
#include <asm/desc.h>
@@ -68,6 +72,8 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
#define SVM_FEATURE_DECODE_ASSIST (1 << 7)
#define SVM_FEATURE_PAUSE_FILTER (1 << 10)
+#define SVM_AVIC_DOORBELL 0xc001011b
+
#define NESTED_EXIT_HOST 0 /* Exit handled on host level */
#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */
#define NESTED_EXIT_CONTINUE 2 /* Further checks needed */
@@ -78,6 +84,18 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
#define TSC_RATIO_MIN 0x0000000000000001ULL
#define TSC_RATIO_MAX 0x000000ffffffffffULL
+#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
+
+/*
+ * 0xff is broadcast, so the max index allowed for physical APIC ID
+ * table is 0xfe. APIC IDs above 0xff are reserved.
+ */
+#define AVIC_MAX_PHYSICAL_ID_COUNT 255
+
+#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1
+#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0
+#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF
+
static bool erratum_383_found __read_mostly;
static const u32 host_save_user_msrs[] = {
@@ -162,8 +180,21 @@ struct vcpu_svm {
/* cached guest cpuid flags for faster access */
bool nrips_enabled : 1;
+
+ u32 ldr_reg;
+ struct page *avic_backing_page;
+ u64 *avic_physical_id_cache;
+ bool avic_is_running;
};
+#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
+#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
+
+#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL)
+#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12)
+#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62)
+#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63)
+
static DEFINE_PER_CPU(u64, current_tsc_ratio);
#define TSC_RATIO_DEFAULT 0x0100000000ULL
@@ -205,6 +236,10 @@ module_param(npt, int, S_IRUGO);
static int nested = true;
module_param(nested, int, S_IRUGO);
+/* enable / disable AVIC */
+static int avic;
+module_param(avic, int, S_IRUGO);
+
static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
static void svm_flush_tlb(struct kvm_vcpu *vcpu);
static void svm_complete_interrupts(struct vcpu_svm *svm);
@@ -228,12 +263,18 @@ enum {
VMCB_SEG, /* CS, DS, SS, ES, CPL */
VMCB_CR2, /* CR2 only */
VMCB_LBR, /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */
+ VMCB_AVIC, /* AVIC APIC_BAR, AVIC APIC_BACKING_PAGE,
+ * AVIC PHYSICAL_TABLE pointer,
+ * AVIC LOGICAL_TABLE pointer
+ */
VMCB_DIRTY_MAX,
};
/* TPR and CR2 are always written before VMRUN */
#define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2))
+#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
+
static inline void mark_all_dirty(struct vmcb *vmcb)
{
vmcb->control.clean = 0;
@@ -255,6 +296,23 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
return container_of(vcpu, struct vcpu_svm, vcpu);
}
+static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
+{
+ svm->vmcb->control.avic_vapic_bar = data & VMCB_AVIC_APIC_BAR_MASK;
+ mark_dirty(svm->vmcb, VMCB_AVIC);
+}
+
+static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ u64 *entry = svm->avic_physical_id_cache;
+
+ if (!entry)
+ return false;
+
+ return (READ_ONCE(*entry) & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
+}
+
static void recalc_intercepts(struct vcpu_svm *svm)
{
struct vmcb_control_area *c, *h;
@@ -923,6 +981,12 @@ static __init int svm_hardware_setup(void)
} else
kvm_disable_tdp();
+ if (avic && (!npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC)))
+ avic = false;
+
+ if (avic)
+ pr_info("AVIC enabled\n");
+
return 0;
err:
@@ -1000,6 +1064,22 @@ static void svm_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment)
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
}
+static void avic_init_vmcb(struct vcpu_svm *svm)
+{
+ struct vmcb *vmcb = svm->vmcb;
+ struct kvm_arch *vm_data = &svm->vcpu.kvm->arch;
+ phys_addr_t bpa = page_to_phys(svm->avic_backing_page);
+ phys_addr_t lpa = page_to_phys(vm_data->avic_logical_id_table_page);
+ phys_addr_t ppa = page_to_phys(vm_data->avic_physical_id_table_page);
+
+ vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
+ vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
+ vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
+ vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT;
+ vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
+ svm->vcpu.arch.apicv_active = true;
+}
+
static void init_vmcb(struct vcpu_svm *svm)
{
struct vmcb_control_area *control = &svm->vmcb->control;
@@ -1014,7 +1094,8 @@ static void init_vmcb(struct vcpu_svm *svm)
set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
set_cr_intercept(svm, INTERCEPT_CR3_WRITE);
set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
- set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
+ if (!kvm_vcpu_apicv_active(&svm->vcpu))
+ set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
set_dr_intercepts(svm);
@@ -1110,9 +1191,197 @@ static void init_vmcb(struct vcpu_svm *svm)
set_intercept(svm, INTERCEPT_PAUSE);
}
+ if (avic)
+ avic_init_vmcb(svm);
+
mark_all_dirty(svm->vmcb);
enable_gif(svm);
+
+}
+
+static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, int index)
+{
+ u64 *avic_physical_id_table;
+ struct kvm_arch *vm_data = &vcpu->kvm->arch;
+
+ if (index >= AVIC_MAX_PHYSICAL_ID_COUNT)
+ return NULL;
+
+ avic_physical_id_table = page_address(vm_data->avic_physical_id_table_page);
+
+ return &avic_physical_id_table[index];
+}
+
+/**
+ * Note:
+ * AVIC hardware walks the nested page table to check permissions,
+ * but does not use the SPA address specified in the leaf page
+ * table entry since it uses address in the AVIC_BACKING_PAGE pointer
+ * field of the VMCB. Therefore, we set up the
+ * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
+ */
+static int avic_init_access_page(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ int ret;
+
+ if (kvm->arch.apic_access_page_done)
+ return 0;
+
+ ret = x86_set_memory_region(kvm,
+ APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
+ APIC_DEFAULT_PHYS_BASE,
+ PAGE_SIZE);
+ if (ret)
+ return ret;
+
+ kvm->arch.apic_access_page_done = true;
+ return 0;
+}
+
+static int avic_init_backing_page(struct kvm_vcpu *vcpu)
+{
+ int ret;
+ u64 *entry, new_entry;
+ int id = vcpu->vcpu_id;
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ ret = avic_init_access_page(vcpu);
+ if (ret)
+ return ret;
+
+ if (id >= AVIC_MAX_PHYSICAL_ID_COUNT)
+ return -EINVAL;
+
+ if (!svm->vcpu.arch.apic->regs)
+ return -EINVAL;
+
+ svm->avic_backing_page = virt_to_page(svm->vcpu.arch.apic->regs);
+
+ /* Setting AVIC backing page address in the phy APIC ID table */
+ entry = avic_get_physical_id_entry(vcpu, id);
+ if (!entry)
+ return -EINVAL;
+
+ new_entry = READ_ONCE(*entry);
+ new_entry = (page_to_phys(svm->avic_backing_page) &
+ AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
+ AVIC_PHYSICAL_ID_ENTRY_VALID_MASK;
+ WRITE_ONCE(*entry, new_entry);
+
+ svm->avic_physical_id_cache = entry;
+
+ return 0;
+}
+
+static void avic_vm_destroy(struct kvm *kvm)
+{
+ struct kvm_arch *vm_data = &kvm->arch;
+
+ if (vm_data->avic_logical_id_table_page)
+ __free_page(vm_data->avic_logical_id_table_page);
+ if (vm_data->avic_physical_id_table_page)
+ __free_page(vm_data->avic_physical_id_table_page);
+}
+
+static int avic_vm_init(struct kvm *kvm)
+{
+ int err = -ENOMEM;
+ struct kvm_arch *vm_data = &kvm->arch;
+ struct page *p_page;
+ struct page *l_page;
+
+ if (!avic)
+ return 0;
+
+ /* Allocating physical APIC ID table (4KB) */
+ p_page = alloc_page(GFP_KERNEL);
+ if (!p_page)
+ goto free_avic;
+
+ vm_data->avic_physical_id_table_page = p_page;
+ clear_page(page_address(p_page));
+
+ /* Allocating logical APIC ID table (4KB) */
+ l_page = alloc_page(GFP_KERNEL);
+ if (!l_page)
+ goto free_avic;
+
+ vm_data->avic_logical_id_table_page = l_page;
+ clear_page(page_address(l_page));
+
+ return 0;
+
+free_avic:
+ avic_vm_destroy(kvm);
+ return err;
+}
+
+/**
+ * This function is called during VCPU halt/unhalt.
+ */
+static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
+{
+ u64 entry;
+ int h_physical_id = __default_cpu_present_to_apicid(vcpu->cpu);
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ if (!kvm_vcpu_apicv_active(vcpu))
+ return;
+
+ svm->avic_is_running = is_run;
+
+ /* ID = 0xff (broadcast), ID > 0xff (reserved) */
+ if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT))
+ return;
+
+ entry = READ_ONCE(*(svm->avic_physical_id_cache));
+ WARN_ON(is_run == !!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK));
+
+ entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
+ if (is_run)
+ entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
+ WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
+}
+
+static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ u64 entry;
+ /* ID = 0xff (broadcast), ID > 0xff (reserved) */
+ int h_physical_id = __default_cpu_present_to_apicid(cpu);
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ if (!kvm_vcpu_apicv_active(vcpu))
+ return;
+
+ if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT))
+ return;
+
+ entry = READ_ONCE(*(svm->avic_physical_id_cache));
+ WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
+
+ entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
+ entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
+
+ entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
+ if (svm->avic_is_running)
+ entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
+
+ WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
+}
+
+static void avic_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ u64 entry;
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ if (!kvm_vcpu_apicv_active(vcpu))
+ return;
+
+ entry = READ_ONCE(*(svm->avic_physical_id_cache));
+ entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
+ WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
}
static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
@@ -1131,6 +1400,9 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
+
+ if (kvm_vcpu_apicv_active(vcpu) && !init_event)
+ avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
}
static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
@@ -1169,6 +1441,17 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
if (!hsave_page)
goto free_page3;
+ if (avic) {
+ err = avic_init_backing_page(&svm->vcpu);
+ if (err)
+ goto free_page4;
+ }
+
+ /* We initialize this flag to true to make sure that the is_running
+ * bit would be set the first time the vcpu is loaded.
+ */
+ svm->avic_is_running = true;
+
svm->nested.hsave = page_address(hsave_page);
svm->msrpm = page_address(msrpm_pages);
@@ -1187,6 +1470,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
return &svm->vcpu;
+free_page4:
+ __free_page(hsave_page);
free_page3:
__free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
free_page2:
@@ -1243,6 +1528,8 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
/* This assumes that the kernel never uses MSR_TSC_AUX */
if (static_cpu_has(X86_FEATURE_RDTSCP))
wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
+
+ avic_vcpu_load(vcpu, cpu);
}
static void svm_vcpu_put(struct kvm_vcpu *vcpu)
@@ -1250,6 +1537,8 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
int i;
+ avic_vcpu_put(vcpu);
+
++vcpu->stat.host_state_reload;
kvm_load_ldt(svm->host.ldt);
#ifdef CONFIG_X86_64
@@ -1265,6 +1554,16 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
}
+static void svm_vcpu_blocking(struct kvm_vcpu *vcpu)
+{
+ avic_set_running(vcpu, false);
+}
+
+static void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
+{
+ avic_set_running(vcpu, true);
+}
+
static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
{
return to_svm(vcpu)->vmcb->save.rflags;
@@ -2673,10 +2972,11 @@ static int clgi_interception(struct vcpu_svm *svm)
disable_gif(svm);
/* After a CLGI no interrupts should come */
- svm_clear_vintr(svm);
- svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
-
- mark_dirty(svm->vmcb, VMCB_INTR);
+ if (!kvm_vcpu_apicv_active(&svm->vcpu)) {
+ svm_clear_vintr(svm);
+ svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
+ mark_dirty(svm->vmcb, VMCB_INTR);
+ }
return 1;
}
@@ -3212,6 +3512,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
case MSR_VM_IGNNE:
vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
break;
+ case MSR_IA32_APICBASE:
+ if (kvm_vcpu_apicv_active(vcpu))
+ avic_update_vapic_bar(to_svm(vcpu), data);
+ /* Follow through */
default:
return kvm_set_msr_common(vcpu, msr);
}
@@ -3281,6 +3585,278 @@ static int mwait_interception(struct vcpu_svm *svm)
return nop_interception(svm);
}
+enum avic_ipi_failure_cause {
+ AVIC_IPI_FAILURE_INVALID_INT_TYPE,
+ AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
+ AVIC_IPI_FAILURE_INVALID_TARGET,
+ AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
+};
+
+static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
+{
+ u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
+ u32 icrl = svm->vmcb->control.exit_info_1;
+ u32 id = svm->vmcb->control.exit_info_2 >> 32;
+ u32 index = svm->vmcb->control.exit_info_2 & 0xFF;
+ struct kvm_lapic *apic = svm->vcpu.arch.apic;
+
+ trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index);
+
+ switch (id) {
+ case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
+ /*
+ * AVIC hardware handles the generation of
+ * IPIs when the specified Message Type is Fixed
+ * (also known as fixed delivery mode) and
+ * the Trigger Mode is edge-triggered. The hardware
+ * also supports self and broadcast delivery modes
+ * specified via the Destination Shorthand(DSH)
+ * field of the ICRL. Logical and physical APIC ID
+ * formats are supported. All other IPI types cause
+ * a #VMEXIT, which needs to emulated.
+ */
+ kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
+ kvm_lapic_reg_write(apic, APIC_ICR, icrl);
+ break;
+ case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
+ int i;
+ struct kvm_vcpu *vcpu;
+ struct kvm *kvm = svm->vcpu.kvm;
+ struct kvm_lapic *apic = svm->vcpu.arch.apic;
+
+ /*
+ * At this point, we expect that the AVIC HW has already
+ * set the appropriate IRR bits on the valid target
+ * vcpus. So, we just need to kick the appropriate vcpu.
+ */
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ bool m = kvm_apic_match_dest(vcpu, apic,
+ icrl & KVM_APIC_SHORT_MASK,
+ GET_APIC_DEST_FIELD(icrh),
+ icrl & KVM_APIC_DEST_MASK);
+
+ if (m && !avic_vcpu_is_running(vcpu))
+ kvm_vcpu_wake_up(vcpu);
+ }
+ break;
+ }
+ case AVIC_IPI_FAILURE_INVALID_TARGET:
+ break;
+ case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
+ WARN_ONCE(1, "Invalid backing page\n");
+ break;
+ default:
+ pr_err("Unknown IPI interception\n");
+ }
+
+ return 1;
+}
+
+static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
+{
+ struct kvm_arch *vm_data = &vcpu->kvm->arch;
+ int index;
+ u32 *logical_apic_id_table;
+ int dlid = GET_APIC_LOGICAL_ID(ldr);
+
+ if (!dlid)
+ return NULL;
+
+ if (flat) { /* flat */
+ index = ffs(dlid) - 1;
+ if (index > 7)
+ return NULL;
+ } else { /* cluster */
+ int cluster = (dlid & 0xf0) >> 4;
+ int apic = ffs(dlid & 0x0f) - 1;
+
+ if ((apic < 0) || (apic > 7) ||
+ (cluster >= 0xf))
+ return NULL;
+ index = (cluster << 2) + apic;
+ }
+
+ logical_apic_id_table = (u32 *) page_address(vm_data->avic_logical_id_table_page);
+
+ return &logical_apic_id_table[index];
+}
+
+static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr,
+ bool valid)
+{
+ bool flat;
+ u32 *entry, new_entry;
+
+ flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT;
+ entry = avic_get_logical_id_entry(vcpu, ldr, flat);
+ if (!entry)
+ return -EINVAL;
+
+ new_entry = READ_ONCE(*entry);
+ new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
+ new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
+ if (valid)
+ new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
+ else
+ new_entry &= ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
+ WRITE_ONCE(*entry, new_entry);
+
+ return 0;
+}
+
+static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
+{
+ int ret;
+ struct vcpu_svm *svm = to_svm(vcpu);
+ u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
+
+ if (!ldr)
+ return 1;
+
+ ret = avic_ldr_write(vcpu, vcpu->vcpu_id, ldr, true);
+ if (ret && svm->ldr_reg) {
+ avic_ldr_write(vcpu, 0, svm->ldr_reg, false);
+ svm->ldr_reg = 0;
+ } else {
+ svm->ldr_reg = ldr;
+ }
+ return ret;
+}
+
+static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
+{
+ u64 *old, *new;
+ struct vcpu_svm *svm = to_svm(vcpu);
+ u32 apic_id_reg = kvm_lapic_get_reg(vcpu->arch.apic, APIC_ID);
+ u32 id = (apic_id_reg >> 24) & 0xff;
+
+ if (vcpu->vcpu_id == id)
+ return 0;
+
+ old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id);
+ new = avic_get_physical_id_entry(vcpu, id);
+ if (!new || !old)
+ return 1;
+
+ /* We need to move physical_id_entry to new offset */
+ *new = *old;
+ *old = 0ULL;
+ to_svm(vcpu)->avic_physical_id_cache = new;
+
+ /*
+ * Also update the guest physical APIC ID in the logical
+ * APIC ID table entry if already setup the LDR.
+ */
+ if (svm->ldr_reg)
+ avic_handle_ldr_update(vcpu);
+
+ return 0;
+}
+
+static int avic_handle_dfr_update(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct kvm_arch *vm_data = &vcpu->kvm->arch;
+ u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR);
+ u32 mod = (dfr >> 28) & 0xf;
+
+ /*
+ * We assume that all local APICs are using the same type.
+ * If this changes, we need to flush the AVIC logical
+ * APID id table.
+ */
+ if (vm_data->ldr_mode == mod)
+ return 0;
+
+ clear_page(page_address(vm_data->avic_logical_id_table_page));
+ vm_data->ldr_mode = mod;
+
+ if (svm->ldr_reg)
+ avic_handle_ldr_update(vcpu);
+ return 0;
+}
+
+static int avic_unaccel_trap_write(struct vcpu_svm *svm)
+{
+ struct kvm_lapic *apic = svm->vcpu.arch.apic;
+ u32 offset = svm->vmcb->control.exit_info_1 &
+ AVIC_UNACCEL_ACCESS_OFFSET_MASK;
+
+ switch (offset) {
+ case APIC_ID:
+ if (avic_handle_apic_id_update(&svm->vcpu))
+ return 0;
+ break;
+ case APIC_LDR:
+ if (avic_handle_ldr_update(&svm->vcpu))
+ return 0;
+ break;
+ case APIC_DFR:
+ avic_handle_dfr_update(&svm->vcpu);
+ break;
+ default:
+ break;
+ }
+
+ kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
+
+ return 1;
+}
+
+static bool is_avic_unaccelerated_access_trap(u32 offset)
+{
+ bool ret = false;
+
+ switch (offset) {
+ case APIC_ID:
+ case APIC_EOI:
+ case APIC_RRR:
+ case APIC_LDR:
+ case APIC_DFR:
+ case APIC_SPIV:
+ case APIC_ESR:
+ case APIC_ICR:
+ case APIC_LVTT:
+ case APIC_LVTTHMR:
+ case APIC_LVTPC:
+ case APIC_LVT0:
+ case APIC_LVT1:
+ case APIC_LVTERR:
+ case APIC_TMICT:
+ case APIC_TDCR:
+ ret = true;
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+static int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
+{
+ int ret = 0;
+ u32 offset = svm->vmcb->control.exit_info_1 &
+ AVIC_UNACCEL_ACCESS_OFFSET_MASK;
+ u32 vector = svm->vmcb->control.exit_info_2 &
+ AVIC_UNACCEL_ACCESS_VECTOR_MASK;
+ bool write = (svm->vmcb->control.exit_info_1 >> 32) &
+ AVIC_UNACCEL_ACCESS_WRITE_MASK;
+ bool trap = is_avic_unaccelerated_access_trap(offset);
+
+ trace_kvm_avic_unaccelerated_access(svm->vcpu.vcpu_id, offset,
+ trap, write, vector);
+ if (trap) {
+ /* Handling Trap */
+ WARN_ONCE(!write, "svm: Handling trap read.\n");
+ ret = avic_unaccel_trap_write(svm);
+ } else {
+ /* Handling Fault */
+ ret = (emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
+ }
+
+ return ret;
+}
+
static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_READ_CR0] = cr_interception,
[SVM_EXIT_READ_CR3] = cr_interception,
@@ -3344,6 +3920,8 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_XSETBV] = xsetbv_interception,
[SVM_EXIT_NPF] = pf_interception,
[SVM_EXIT_RSM] = emulate_on_interception,
+ [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception,
+ [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception,
};
static void dump_vmcb(struct kvm_vcpu *vcpu)
@@ -3375,10 +3953,14 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err);
pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
+ pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl);
pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
+ pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page);
+ pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id);
+ pr_err("%-20s%016llx\n", "avic_physical_id:", control->avic_physical_id);
pr_err("VMCB State Save Area:\n");
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
"es:",
@@ -3562,6 +4144,7 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
{
struct vmcb_control_area *control;
+ /* The following fields are ignored when AVIC is enabled */
control = &svm->vmcb->control;
control->int_vector = irq;
control->int_ctl &= ~V_INTR_PRIO_MASK;
@@ -3583,11 +4166,17 @@ static void svm_set_irq(struct kvm_vcpu *vcpu)
SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
}
+static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu)
+{
+ return is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK);
+}
+
static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
{
struct vcpu_svm *svm = to_svm(vcpu);
- if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
+ if (svm_nested_virtualize_tpr(vcpu) ||
+ kvm_vcpu_apicv_active(vcpu))
return;
clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
@@ -3606,11 +4195,28 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
static bool svm_get_enable_apicv(void)
{
- return false;
+ return avic;
+}
+
+static void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
+{
}
+static void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
+{
+}
+
+/* Note: Currently only used by Hyper-V. */
static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct vmcb *vmcb = svm->vmcb;
+
+ if (!avic)
+ return;
+
+ vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
+ mark_dirty(vmcb, VMCB_INTR);
}
static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
@@ -3623,6 +4229,18 @@ static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu)
return;
}
+static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
+{
+ kvm_lapic_set_irr(vec, vcpu->arch.apic);
+ smp_mb__after_atomic();
+
+ if (avic_vcpu_is_running(vcpu))
+ wrmsrl(SVM_AVIC_DOORBELL,
+ __default_cpu_present_to_apicid(vcpu->cpu));
+ else
+ kvm_vcpu_wake_up(vcpu);
+}
+
static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3677,6 +4295,9 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ if (kvm_vcpu_apicv_active(vcpu))
+ return;
+
/*
* In case GIF=0 we can't rely on the CPU to tell us when GIF becomes
* 1, because that's a separate STGI/VMRUN intercept. The next time we
@@ -3728,7 +4349,7 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
+ if (svm_nested_virtualize_tpr(vcpu))
return;
if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) {
@@ -3742,7 +4363,8 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
u64 cr8;
- if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
+ if (svm_nested_virtualize_tpr(vcpu) ||
+ kvm_vcpu_apicv_active(vcpu))
return;
cr8 = kvm_get_cr8(vcpu);
@@ -4045,14 +4667,26 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
static void svm_cpuid_update(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ struct kvm_cpuid_entry2 *entry;
/* Update nrips enabled cache */
svm->nrips_enabled = !!guest_cpuid_has_nrips(&svm->vcpu);
+
+ if (!kvm_vcpu_apicv_active(vcpu))
+ return;
+
+ entry = kvm_find_cpuid_entry(vcpu, 1, 0);
+ if (entry)
+ entry->ecx &= ~bit(X86_FEATURE_X2APIC);
}
static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
{
switch (func) {
+ case 0x1:
+ if (avic)
+ entry->ecx &= ~bit(X86_FEATURE_X2APIC);
+ break;
case 0x80000001:
if (nested)
entry->ecx |= (1 << 2); /* Set SVM bit */
@@ -4307,6 +4941,15 @@ static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
{
}
+static inline void avic_post_state_restore(struct kvm_vcpu *vcpu)
+{
+ if (avic_handle_apic_id_update(vcpu) != 0)
+ return;
+ if (avic_handle_dfr_update(vcpu) != 0)
+ return;
+ avic_handle_ldr_update(vcpu);
+}
+
static struct kvm_x86_ops svm_x86_ops = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -4322,9 +4965,14 @@ static struct kvm_x86_ops svm_x86_ops = {
.vcpu_free = svm_free_vcpu,
.vcpu_reset = svm_vcpu_reset,
+ .vm_init = avic_vm_init,
+ .vm_destroy = avic_vm_destroy,
+
.prepare_guest_switch = svm_prepare_guest_switch,
.vcpu_load = svm_vcpu_load,
.vcpu_put = svm_vcpu_put,
+ .vcpu_blocking = svm_vcpu_blocking,
+ .vcpu_unblocking = svm_vcpu_unblocking,
.update_bp_intercept = update_bp_intercept,
.get_msr = svm_get_msr,
@@ -4382,6 +5030,9 @@ static struct kvm_x86_ops svm_x86_ops = {
.refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
.load_eoi_exitmap = svm_load_eoi_exitmap,
.sync_pir_to_irr = svm_sync_pir_to_irr,
+ .hwapic_irr_update = svm_hwapic_irr_update,
+ .hwapic_isr_update = svm_hwapic_isr_update,
+ .apicv_post_state_restore = avic_post_state_restore,
.set_tss_addr = svm_set_tss_addr,
.get_tdp_level = get_npt_level,
@@ -4415,6 +5066,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.sched_in = svm_sched_in,
.pmu_ops = &amd_pmu_ops,
+ .deliver_posted_interrupt = svm_deliver_avic_intr,
};
static int __init svm_init(void)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index b72743c5668d..8de925031b5c 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1291,6 +1291,63 @@ TRACE_EVENT(kvm_hv_stimer_cleanup,
__entry->vcpu_id, __entry->timer_index)
);
+/*
+ * Tracepoint for AMD AVIC
+ */
+TRACE_EVENT(kvm_avic_incomplete_ipi,
+ TP_PROTO(u32 vcpu, u32 icrh, u32 icrl, u32 id, u32 index),
+ TP_ARGS(vcpu, icrh, icrl, id, index),
+
+ TP_STRUCT__entry(
+ __field(u32, vcpu)
+ __field(u32, icrh)
+ __field(u32, icrl)
+ __field(u32, id)
+ __field(u32, index)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu = vcpu;
+ __entry->icrh = icrh;
+ __entry->icrl = icrl;
+ __entry->id = id;
+ __entry->index = index;
+ ),
+
+ TP_printk("vcpu=%u, icrh:icrl=%#010x:%08x, id=%u, index=%u\n",
+ __entry->vcpu, __entry->icrh, __entry->icrl,
+ __entry->id, __entry->index)
+);
+
+TRACE_EVENT(kvm_avic_unaccelerated_access,
+ TP_PROTO(u32 vcpu, u32 offset, bool ft, bool rw, u32 vec),
+ TP_ARGS(vcpu, offset, ft, rw, vec),
+
+ TP_STRUCT__entry(
+ __field(u32, vcpu)
+ __field(u32, offset)
+ __field(bool, ft)
+ __field(bool, rw)
+ __field(u32, vec)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu = vcpu;
+ __entry->offset = offset;
+ __entry->ft = ft;
+ __entry->rw = rw;
+ __entry->vec = vec;
+ ),
+
+ TP_printk("vcpu=%u, offset=%#x(%s), %s, %s, vec=%#x\n",
+ __entry->vcpu,
+ __entry->offset,
+ __print_symbolic(__entry->offset, kvm_trace_symbol_apic),
+ __entry->ft ? "trap" : "fault",
+ __entry->rw ? "write" : "read",
+ __entry->vec)
+);
+
#endif /* _TRACE_KVM_H */
#undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index cb47fe3da292..fb93010beaa4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2418,7 +2418,9 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
if (is_guest_mode(vcpu))
msr_bitmap = vmx_msr_bitmap_nested;
- else if (vcpu->arch.apic_base & X2APIC_ENABLE) {
+ else if (cpu_has_secondary_exec_ctrls() &&
+ (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
+ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
if (is_long_mode(vcpu))
msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
else
@@ -4787,6 +4789,19 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
+ if (cpu_has_secondary_exec_ctrls()) {
+ if (kvm_vcpu_apicv_active(vcpu))
+ vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
+ SECONDARY_EXEC_APIC_REGISTER_VIRT |
+ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
+ else
+ vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
+ SECONDARY_EXEC_APIC_REGISTER_VIRT |
+ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
+ }
+
+ if (cpu_has_vmx_msr_bitmap())
+ vmx_set_msr_bitmap(vcpu);
}
static u32 vmx_exec_control(struct vcpu_vmx *vmx)
@@ -5050,8 +5065,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
- vmx_set_cr0(vcpu, cr0); /* enter rmode */
vmx->vcpu.arch.cr0 = cr0;
+ vmx_set_cr0(vcpu, cr0); /* enter rmode */
vmx_set_cr4(vcpu, 0);
vmx_set_efer(vcpu, 0);
vmx_fpu_activate(vcpu);
@@ -6333,23 +6348,20 @@ static __init int hardware_setup(void)
set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
- if (enable_apicv) {
- for (msr = 0x800; msr <= 0x8ff; msr++)
- vmx_disable_intercept_msr_read_x2apic(msr);
-
- /* According SDM, in x2apic mode, the whole id reg is used.
- * But in KVM, it only use the highest eight bits. Need to
- * intercept it */
- vmx_enable_intercept_msr_read_x2apic(0x802);
- /* TMCCT */
- vmx_enable_intercept_msr_read_x2apic(0x839);
- /* TPR */
- vmx_disable_intercept_msr_write_x2apic(0x808);
- /* EOI */
- vmx_disable_intercept_msr_write_x2apic(0x80b);
- /* SELF-IPI */
- vmx_disable_intercept_msr_write_x2apic(0x83f);
- }
+ for (msr = 0x800; msr <= 0x8ff; msr++)
+ vmx_disable_intercept_msr_read_x2apic(msr);
+
+ /* According SDM, in x2apic mode, the whole id reg is used. But in
+ * KVM, it only use the highest eight bits. Need to intercept it */
+ vmx_enable_intercept_msr_read_x2apic(0x802);
+ /* TMCCT */
+ vmx_enable_intercept_msr_read_x2apic(0x839);
+ /* TPR */
+ vmx_disable_intercept_msr_write_x2apic(0x808);
+ /* EOI */
+ vmx_disable_intercept_msr_write_x2apic(0x80b);
+ /* SELF-IPI */
+ vmx_disable_intercept_msr_write_x2apic(0x83f);
if (enable_ept) {
kvm_mmu_set_mask_ptes(0ull,
@@ -8318,19 +8330,19 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
vmcs_write64(APIC_ACCESS_ADDR, hpa);
}
-static void vmx_hwapic_isr_update(struct kvm *kvm, int isr)
+static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
{
u16 status;
u8 old;
- if (isr == -1)
- isr = 0;
+ if (max_isr == -1)
+ max_isr = 0;
status = vmcs_read16(GUEST_INTR_STATUS);
old = status >> 8;
- if (isr != old) {
+ if (max_isr != old) {
status &= 0xff;
- status |= isr << 8;
+ status |= max_isr << 8;
vmcs_write16(GUEST_INTR_STATUS, status);
}
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 12f33e662382..902d9da12392 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -161,6 +161,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "halt_exits", VCPU_STAT(halt_exits) },
{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
+ { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
{ "hypercalls", VCPU_STAT(hypercalls) },
{ "request_irq", VCPU_STAT(request_irq_exits) },
@@ -2002,22 +2003,8 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu)
vcpu->arch.pv_time_enabled = false;
}
-static void accumulate_steal_time(struct kvm_vcpu *vcpu)
-{
- u64 delta;
-
- if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
- return;
-
- delta = current->sched_info.run_delay - vcpu->arch.st.last_steal;
- vcpu->arch.st.last_steal = current->sched_info.run_delay;
- vcpu->arch.st.accum_steal = delta;
-}
-
static void record_steal_time(struct kvm_vcpu *vcpu)
{
- accumulate_steal_time(vcpu);
-
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
@@ -2025,9 +2012,26 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
return;
- vcpu->arch.st.steal.steal += vcpu->arch.st.accum_steal;
- vcpu->arch.st.steal.version += 2;
- vcpu->arch.st.accum_steal = 0;
+ if (vcpu->arch.st.steal.version & 1)
+ vcpu->arch.st.steal.version += 1; /* first time write, random junk */
+
+ vcpu->arch.st.steal.version += 1;
+
+ kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
+ &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
+
+ smp_wmb();
+
+ vcpu->arch.st.steal.steal += current->sched_info.run_delay -
+ vcpu->arch.st.last_steal;
+ vcpu->arch.st.last_steal = current->sched_info.run_delay;
+
+ kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
+ &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
+
+ smp_wmb();
+
+ vcpu->arch.st.steal.version += 1;
kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
@@ -2310,6 +2314,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_AMD64_NB_CFG:
case MSR_FAM10H_MMIO_CONF_BASE:
case MSR_AMD64_BU_CFG2:
+ case MSR_IA32_PERF_CTL:
msr_info->data = 0;
break;
case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
@@ -2968,6 +2973,10 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
| KVM_VCPUEVENT_VALID_SMM))
return -EINVAL;
+ if (events->exception.injected &&
+ (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR))
+ return -EINVAL;
+
process_nmi(vcpu);
vcpu->arch.exception.pending = events->exception.injected;
vcpu->arch.exception.nr = events->exception.nr;
@@ -3032,6 +3041,11 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
if (dbgregs->flags)
return -EINVAL;
+ if (dbgregs->dr6 & ~0xffffffffull)
+ return -EINVAL;
+ if (dbgregs->dr7 & ~0xffffffffull)
+ return -EINVAL;
+
memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
kvm_update_dr0123(vcpu);
vcpu->arch.dr6 = dbgregs->dr6;
@@ -7752,6 +7766,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_page_track_init(kvm);
kvm_mmu_init_vm(kvm);
+ if (kvm_x86_ops->vm_init)
+ return kvm_x86_ops->vm_init(kvm);
+
return 0;
}
@@ -7808,7 +7825,7 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
slot = id_to_memslot(slots, id);
if (size) {
- if (WARN_ON(slot->npages))
+ if (slot->npages)
return -EEXIST;
/*
@@ -7873,6 +7890,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, 0, 0);
x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
}
+ if (kvm_x86_ops->vm_destroy)
+ kvm_x86_ops->vm_destroy(kvm);
kvm_iommu_unmap_guest(kvm);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
@@ -8355,19 +8374,21 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
}
EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
+bool kvm_arch_has_irq_bypass(void)
+{
+ return kvm_x86_ops->update_pi_irte != NULL;
+}
+
int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
struct irq_bypass_producer *prod)
{
struct kvm_kernel_irqfd *irqfd =
container_of(cons, struct kvm_kernel_irqfd, consumer);
- if (kvm_x86_ops->update_pi_irte) {
- irqfd->producer = prod;
- return kvm_x86_ops->update_pi_irte(irqfd->kvm,
- prod->irq, irqfd->gsi, 1);
- }
+ irqfd->producer = prod;
- return -EINVAL;
+ return kvm_x86_ops->update_pi_irte(irqfd->kvm,
+ prod->irq, irqfd->gsi, 1);
}
void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
@@ -8377,11 +8398,6 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
struct kvm_kernel_irqfd *irqfd =
container_of(cons, struct kvm_kernel_irqfd, consumer);
- if (!kvm_x86_ops->update_pi_irte) {
- WARN_ON(irqfd->producer != NULL);
- return;
- }
-
WARN_ON(irqfd->producer != prod);
irqfd->producer = NULL;
@@ -8429,3 +8445,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 5ce1ed02f7e8..7d1fa7cd2374 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -292,7 +292,7 @@ void vmalloc_sync_all(void)
return;
for (address = VMALLOC_START & PMD_MASK;
- address >= TASK_SIZE && address < FIXADDR_TOP;
+ address >= TASK_SIZE_MAX && address < FIXADDR_TOP;
address += PMD_SIZE) {
struct page *page;
@@ -854,8 +854,13 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
return;
}
#endif
- /* Kernel addresses are always protection faults: */
- if (address >= TASK_SIZE)
+
+ /*
+ * To avoid leaking information about the kernel page table
+ * layout, pretend that user-mode accesses to kernel addresses
+ * are always protection faults.
+ */
+ if (address >= TASK_SIZE_MAX)
error_code |= PF_PROT;
if (likely(show_unhandled_signals))
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 14a95054d4e0..2ae8584b44c7 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -165,6 +165,7 @@ static __init int setup_hugepagesz(char *opt)
} else if (ps == PUD_SIZE && boot_cpu_has(X86_FEATURE_GBPAGES)) {
hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
} else {
+ hugetlb_bad_size();
printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n",
ps >> 20);
return 0;
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index f70c1ff46125..9c086c57105c 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -617,9 +617,7 @@ static void __init numa_init_array(void)
if (early_cpu_to_node(i) != NUMA_NO_NODE)
continue;
numa_set_node(i, rr);
- rr = next_node(rr, node_online_map);
- if (rr == MAX_NUMNODES)
- rr = first_node(node_online_map);
+ rr = next_node_in(rr, node_online_map);
}
}
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 4286f3618bd0..fe04a04dab8e 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -110,11 +110,16 @@ static void bpf_flush_icache(void *start, void *end)
((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
/* pick a register outside of BPF range for JIT internal work */
-#define AUX_REG (MAX_BPF_REG + 1)
+#define AUX_REG (MAX_BPF_JIT_REG + 1)
-/* the following table maps BPF registers to x64 registers.
- * x64 register r12 is unused, since if used as base address register
- * in load/store instructions, it always needs an extra byte of encoding
+/* The following table maps BPF registers to x64 registers.
+ *
+ * x64 register r12 is unused, since if used as base address
+ * register in load/store instructions, it always needs an
+ * extra byte of encoding and is callee saved.
+ *
+ * r9 caches skb->len - skb->data_len
+ * r10 caches skb->data, and used for blinding (if enabled)
*/
static const int reg2hex[] = {
[BPF_REG_0] = 0, /* rax */
@@ -128,6 +133,7 @@ static const int reg2hex[] = {
[BPF_REG_8] = 6, /* r14 callee saved */
[BPF_REG_9] = 7, /* r15 callee saved */
[BPF_REG_FP] = 5, /* rbp readonly */
+ [BPF_REG_AX] = 2, /* r10 temp register */
[AUX_REG] = 3, /* r11 temp register */
};
@@ -141,7 +147,8 @@ static bool is_ereg(u32 reg)
BIT(AUX_REG) |
BIT(BPF_REG_7) |
BIT(BPF_REG_8) |
- BIT(BPF_REG_9));
+ BIT(BPF_REG_9) |
+ BIT(BPF_REG_AX));
}
/* add modifiers if 'reg' maps to x64 registers r8..r15 */
@@ -182,6 +189,7 @@ static void jit_fill_hole(void *area, unsigned int size)
struct jit_context {
int cleanup_addr; /* epilogue code offset */
bool seen_ld_abs;
+ bool seen_ax_reg;
};
/* maximum number of bytes emitted while JITing one eBPF insn */
@@ -345,6 +353,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
struct bpf_insn *insn = bpf_prog->insnsi;
int insn_cnt = bpf_prog->len;
bool seen_ld_abs = ctx->seen_ld_abs | (oldproglen == 0);
+ bool seen_ax_reg = ctx->seen_ax_reg | (oldproglen == 0);
bool seen_exit = false;
u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
int i, cnt = 0;
@@ -367,6 +376,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int ilen;
u8 *func;
+ if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
+ ctx->seen_ax_reg = seen_ax_reg = true;
+
switch (insn->code) {
/* ALU */
case BPF_ALU | BPF_ADD | BPF_X:
@@ -1002,6 +1014,10 @@ common_load:
* sk_load_* helpers also use %r10 and %r9d.
* See bpf_jit.S
*/
+ if (seen_ax_reg)
+ /* r10 = skb->data, mov %r10, off32(%rbx) */
+ EMIT3_off32(0x4c, 0x8b, 0x93,
+ offsetof(struct sk_buff, data));
EMIT1_off32(0xE8, jmp_offset); /* call */
break;
@@ -1073,25 +1089,37 @@ void bpf_jit_compile(struct bpf_prog *prog)
{
}
-void bpf_int_jit_compile(struct bpf_prog *prog)
+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
struct bpf_binary_header *header = NULL;
+ struct bpf_prog *tmp, *orig_prog = prog;
int proglen, oldproglen = 0;
struct jit_context ctx = {};
+ bool tmp_blinded = false;
u8 *image = NULL;
int *addrs;
int pass;
int i;
if (!bpf_jit_enable)
- return;
+ return orig_prog;
- if (!prog || !prog->len)
- return;
+ tmp = bpf_jit_blind_constants(prog);
+ /* If blinding was requested and we failed during blinding,
+ * we must fall back to the interpreter.
+ */
+ if (IS_ERR(tmp))
+ return orig_prog;
+ if (tmp != prog) {
+ tmp_blinded = true;
+ prog = tmp;
+ }
addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL);
- if (!addrs)
- return;
+ if (!addrs) {
+ prog = orig_prog;
+ goto out;
+ }
/* Before first pass, make a rough estimation of addrs[]
* each bpf instruction is translated to less than 64 bytes
@@ -1113,21 +1141,25 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
image = NULL;
if (header)
bpf_jit_binary_free(header);
- goto out;
+ prog = orig_prog;
+ goto out_addrs;
}
if (image) {
if (proglen != oldproglen) {
pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
proglen, oldproglen);
- goto out;
+ prog = orig_prog;
+ goto out_addrs;
}
break;
}
if (proglen == oldproglen) {
header = bpf_jit_binary_alloc(proglen, &image,
1, jit_fill_hole);
- if (!header)
- goto out;
+ if (!header) {
+ prog = orig_prog;
+ goto out_addrs;
+ }
}
oldproglen = proglen;
}
@@ -1141,8 +1173,14 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
prog->bpf_func = (void *)image;
prog->jited = 1;
}
-out:
+
+out_addrs:
kfree(addrs);
+out:
+ if (tmp_blinded)
+ bpf_jit_prog_release_other(prog, prog == orig_prog ?
+ tmp : orig_prog);
+ return prog;
}
void bpf_jit_free(struct bpf_prog *fp)
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 3cd69832d7f4..b2a4e2a61f6b 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -396,7 +396,6 @@ int __init pci_acpi_init(void)
return -ENODEV;
printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n");
- acpi_irq_penalty_init();
pcibios_enable_irq = acpi_pci_irq_enable;
pcibios_disable_irq = acpi_pci_irq_disable;
x86_init.pci.init_irq = x86_init_noop;
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 381a43c40bf7..8196054fedb0 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -516,7 +516,7 @@ void __init pcibios_set_cache_line_size(void)
int __init pcibios_init(void)
{
- if (!raw_pci_ops) {
+ if (!raw_pci_ops && !raw_pci_ext_ops) {
printk(KERN_WARNING "PCI: System does not support PCI\n");
return 0;
}
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index b7de1929714b..837ea36a837d 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -552,9 +552,16 @@ static void twinhead_reserve_killing_zone(struct pci_dev *dev)
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x27B9, twinhead_reserve_killing_zone);
+/*
+ * Broadwell EP Home Agent BARs erroneously return non-zero values when read.
+ *
+ * See http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v4-spec-update.html
+ * entry BDF2.
+ */
static void pci_bdwep_bar(struct pci_dev *dev)
{
dev->non_compliant_bars = 1;
}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6f60, pci_bdwep_bar);
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fa0, pci_bdwep_bar);
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, pci_bdwep_bar);
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 4bd08b0fc8ea..99ddab79215e 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -491,8 +491,11 @@ int __init pci_xen_initial_domain(void)
#endif
__acpi_register_gsi = acpi_register_gsi_xen;
__acpi_unregister_gsi = NULL;
- /* Pre-allocate legacy irqs */
- for (irq = 0; irq < nr_legacy_irqs(); irq++) {
+ /*
+ * Pre-allocate the legacy IRQs. Use NR_LEGACY_IRQS here
+ * because we don't have a PIC and thus nr_legacy_irqs() is zero.
+ */
+ for (irq = 0; irq < NR_IRQS_LEGACY; irq++) {
int trigger, polarity;
if (acpi_get_override_irq(irq, &trigger, &polarity) == -1)
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S
index 92723aeae0f9..cd95075944ab 100644
--- a/arch/x86/platform/efi/efi_stub_64.S
+++ b/arch/x86/platform/efi/efi_stub_64.S
@@ -11,7 +11,6 @@
#include <asm/msr.h>
#include <asm/processor-flags.h>
#include <asm/page_types.h>
-#include <asm/frame.h>
#define SAVE_XMM \
mov %rsp, %rax; \
@@ -40,10 +39,10 @@
mov (%rsp), %rsp
ENTRY(efi_call)
- FRAME_BEGIN
+ pushq %rbp
+ movq %rsp, %rbp
SAVE_XMM
- mov (%rsp), %rax
- mov 8(%rax), %rax
+ mov 16(%rbp), %rax
subq $48, %rsp
mov %r9, 32(%rsp)
mov %rax, 40(%rsp)
@@ -53,6 +52,6 @@ ENTRY(efi_call)
call *%rdi
addq $48, %rsp
RESTORE_XMM
- FRAME_END
+ popq %rbp
ret
ENDPROC(efi_call)
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 097cb09d917b..4480c06cade7 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -371,5 +371,5 @@ bool efi_reboot_required(void)
bool efi_poweroff_required(void)
{
- return !!acpi_gbl_reduced_hardware;
+ return acpi_gbl_reduced_hardware || acpi_no_s5;
}
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 92e3e1d84c1d..12734a96df47 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -26,7 +26,5 @@ quiet_cmd_bin2c = BIN2C $@
$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro FORCE
$(call if_changed,bin2c)
- @:
-
obj-$(CONFIG_KEXEC_FILE) += kexec-purgatory.o
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
index b95964610ea7..c556c5ae8de5 100644
--- a/arch/x86/realmode/rm/Makefile
+++ b/arch/x86/realmode/rm/Makefile
@@ -59,7 +59,6 @@ OBJCOPYFLAGS_realmode.bin := -O binary
targets += realmode.bin
$(obj)/realmode.bin: $(obj)/realmode.elf $(obj)/realmode.relocs FORCE
$(call if_changed,objcopy)
- @:
quiet_cmd_relocs = RELOCS $@
cmd_relocs = arch/x86/tools/relocs --realmode $< > $@
diff --git a/arch/x86/um/os-Linux/registers.c b/arch/x86/um/os-Linux/registers.c
index 41bfe84e11ab..00f54a91bb4b 100644
--- a/arch/x86/um/os-Linux/registers.c
+++ b/arch/x86/um/os-Linux/registers.c
@@ -11,21 +11,56 @@
#endif
#include <longjmp.h>
#include <sysdep/ptrace_user.h>
+#include <sys/uio.h>
+#include <asm/sigcontext.h>
+#include <linux/elf.h>
-int save_fp_registers(int pid, unsigned long *fp_regs)
+int have_xstate_support;
+
+int save_i387_registers(int pid, unsigned long *fp_regs)
{
if (ptrace(PTRACE_GETFPREGS, pid, 0, fp_regs) < 0)
return -errno;
return 0;
}
-int restore_fp_registers(int pid, unsigned long *fp_regs)
+int save_fp_registers(int pid, unsigned long *fp_regs)
+{
+ struct iovec iov;
+
+ if (have_xstate_support) {
+ iov.iov_base = fp_regs;
+ iov.iov_len = sizeof(struct _xstate);
+ if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) < 0)
+ return -errno;
+ return 0;
+ } else {
+ return save_i387_registers(pid, fp_regs);
+ }
+}
+
+int restore_i387_registers(int pid, unsigned long *fp_regs)
{
if (ptrace(PTRACE_SETFPREGS, pid, 0, fp_regs) < 0)
return -errno;
return 0;
}
+int restore_fp_registers(int pid, unsigned long *fp_regs)
+{
+ struct iovec iov;
+
+ if (have_xstate_support) {
+ iov.iov_base = fp_regs;
+ iov.iov_len = sizeof(struct _xstate);
+ if (ptrace(PTRACE_SETREGSET, pid, NT_X86_XSTATE, &iov) < 0)
+ return -errno;
+ return 0;
+ } else {
+ return restore_i387_registers(pid, fp_regs);
+ }
+}
+
#ifdef __i386__
int have_fpx_regs = 1;
int save_fpx_registers(int pid, unsigned long *fp_regs)
@@ -85,6 +120,16 @@ int put_fp_registers(int pid, unsigned long *regs)
return restore_fp_registers(pid, regs);
}
+void arch_init_registers(int pid)
+{
+ struct _xstate fp_regs;
+ struct iovec iov;
+
+ iov.iov_base = &fp_regs;
+ iov.iov_len = sizeof(struct _xstate);
+ if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) == 0)
+ have_xstate_support = 1;
+}
#endif
unsigned long get_thread_reg(int reg, jmp_buf *buf)
diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c
index 47c78d5e5c32..ebd4dd6ef73b 100644
--- a/arch/x86/um/ptrace_32.c
+++ b/arch/x86/um/ptrace_32.c
@@ -194,7 +194,8 @@ static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *c
int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
struct user_i387_struct fpregs;
- err = save_fp_registers(userspace_pid[cpu], (unsigned long *) &fpregs);
+ err = save_i387_registers(userspace_pid[cpu],
+ (unsigned long *) &fpregs);
if (err)
return err;
@@ -214,7 +215,7 @@ static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *c
if (n > 0)
return -EFAULT;
- return restore_fp_registers(userspace_pid[cpu],
+ return restore_i387_registers(userspace_pid[cpu],
(unsigned long *) &fpregs);
}
diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c
index a629694ee750..faab418876ce 100644
--- a/arch/x86/um/ptrace_64.c
+++ b/arch/x86/um/ptrace_64.c
@@ -222,14 +222,14 @@ int is_syscall(unsigned long addr)
static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
{
int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
- long fpregs[HOST_FP_SIZE];
+ struct user_i387_struct fpregs;
- BUG_ON(sizeof(*buf) != sizeof(fpregs));
- err = save_fp_registers(userspace_pid[cpu], fpregs);
+ err = save_i387_registers(userspace_pid[cpu],
+ (unsigned long *) &fpregs);
if (err)
return err;
- n = copy_to_user(buf, fpregs, sizeof(fpregs));
+ n = copy_to_user(buf, &fpregs, sizeof(fpregs));
if (n > 0)
return -EFAULT;
@@ -239,14 +239,14 @@ static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *c
static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
{
int n, cpu = ((struct thread_info *) child->stack)->cpu;
- long fpregs[HOST_FP_SIZE];
+ struct user_i387_struct fpregs;
- BUG_ON(sizeof(*buf) != sizeof(fpregs));
- n = copy_from_user(fpregs, buf, sizeof(fpregs));
+ n = copy_from_user(&fpregs, buf, sizeof(fpregs));
if (n > 0)
return -EFAULT;
- return restore_fp_registers(userspace_pid[cpu], fpregs);
+ return restore_i387_registers(userspace_pid[cpu],
+ (unsigned long *) &fpregs);
}
long subarch_ptrace(struct task_struct *child, long request,
diff --git a/arch/x86/um/shared/sysdep/ptrace_64.h b/arch/x86/um/shared/sysdep/ptrace_64.h
index 919789f1071e..0dc223aa1c2d 100644
--- a/arch/x86/um/shared/sysdep/ptrace_64.h
+++ b/arch/x86/um/shared/sysdep/ptrace_64.h
@@ -57,8 +57,6 @@
#define UPT_SYSCALL_ARG5(r) UPT_R8(r)
#define UPT_SYSCALL_ARG6(r) UPT_R9(r)
-static inline void arch_init_registers(int pid)
-{
-}
+extern void arch_init_registers(int pid);
#endif
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index 14fcd01ed992..49e503697022 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -225,26 +225,16 @@ static int copy_sc_from_user(struct pt_regs *regs,
} else
#endif
{
- struct user_i387_struct fp;
-
- err = copy_from_user(&fp, (void *)sc.fpstate,
- sizeof(struct user_i387_struct));
+ err = copy_from_user(regs->regs.fp, (void *)sc.fpstate,
+ sizeof(struct _xstate));
if (err)
return 1;
-
- err = restore_fp_registers(pid, (unsigned long *) &fp);
- if (err < 0) {
- printk(KERN_ERR "copy_sc_from_user - "
- "restore_fp_registers failed, errno = %d\n",
- -err);
- return 1;
- }
}
return 0;
}
static int copy_sc_to_user(struct sigcontext __user *to,
- struct _fpstate __user *to_fp, struct pt_regs *regs,
+ struct _xstate __user *to_fp, struct pt_regs *regs,
unsigned long mask)
{
struct sigcontext sc;
@@ -310,25 +300,22 @@ static int copy_sc_to_user(struct sigcontext __user *to,
return 1;
}
- err = convert_fxsr_to_user(to_fp, &fpx);
+ err = convert_fxsr_to_user(&to_fp->fpstate, &fpx);
if (err)
return 1;
- err |= __put_user(fpx.swd, &to_fp->status);
- err |= __put_user(X86_FXSR_MAGIC, &to_fp->magic);
+ err |= __put_user(fpx.swd, &to_fp->fpstate.status);
+ err |= __put_user(X86_FXSR_MAGIC, &to_fp->fpstate.magic);
if (err)
return 1;
- if (copy_to_user(&to_fp->_fxsr_env[0], &fpx,
+ if (copy_to_user(&to_fp->fpstate._fxsr_env[0], &fpx,
sizeof(struct user_fxsr_struct)))
return 1;
} else
#endif
{
- struct user_i387_struct fp;
-
- err = save_fp_registers(pid, (unsigned long *) &fp);
- if (copy_to_user(to_fp, &fp, sizeof(struct user_i387_struct)))
+ if (copy_to_user(to_fp, regs->regs.fp, sizeof(struct _xstate)))
return 1;
}
@@ -337,7 +324,7 @@ static int copy_sc_to_user(struct sigcontext __user *to,
#ifdef CONFIG_X86_32
static int copy_ucontext_to_user(struct ucontext __user *uc,
- struct _fpstate __user *fp, sigset_t *set,
+ struct _xstate __user *fp, sigset_t *set,
unsigned long sp)
{
int err = 0;
@@ -353,7 +340,7 @@ struct sigframe
char __user *pretcode;
int sig;
struct sigcontext sc;
- struct _fpstate fpstate;
+ struct _xstate fpstate;
unsigned long extramask[_NSIG_WORDS-1];
char retcode[8];
};
@@ -366,7 +353,7 @@ struct rt_sigframe
void __user *puc;
struct siginfo info;
struct ucontext uc;
- struct _fpstate fpstate;
+ struct _xstate fpstate;
char retcode[8];
};
@@ -495,7 +482,7 @@ struct rt_sigframe
char __user *pretcode;
struct ucontext uc;
struct siginfo info;
- struct _fpstate fpstate;
+ struct _xstate fpstate;
};
int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig,
diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c
index 470564bbd08e..cb3c22370cf5 100644
--- a/arch/x86/um/user-offsets.c
+++ b/arch/x86/um/user-offsets.c
@@ -50,7 +50,7 @@ void foo(void)
DEFINE(HOST_GS, GS);
DEFINE(HOST_ORIG_AX, ORIG_EAX);
#else
- DEFINE(HOST_FP_SIZE, sizeof(struct _fpstate) / sizeof(unsigned long));
+ DEFINE(HOST_FP_SIZE, sizeof(struct _xstate) / sizeof(unsigned long));
DEFINE_LONGS(HOST_BX, RBX);
DEFINE_LONGS(HOST_CX, RCX);
DEFINE_LONGS(HOST_DI, RDI);
diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c
index 237c6831e095..6be22f991b59 100644
--- a/arch/x86/um/vdso/vma.c
+++ b/arch/x86/um/vdso/vma.c
@@ -61,7 +61,8 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
if (!vdso_enabled)
return 0;
- down_write(&mm->mmap_sem);
+ if (down_write_killable(&mm->mmap_sem))
+ return -EINTR;
err = install_special_mapping(mm, um_vdso_addr, PAGE_SIZE,
VM_READ|VM_EXEC|
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 7ab29518a3b9..e345891450c3 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -393,6 +393,9 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
unsigned long i = 0;
unsigned long n = end_pfn - start_pfn;
+ if (remap_pfn == 0)
+ remap_pfn = nr_pages;
+
while (i < n) {
unsigned long cur_pfn = start_pfn + i;
unsigned long left = n - i;
@@ -438,17 +441,29 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
return remap_pfn;
}
-static void __init xen_set_identity_and_remap(unsigned long nr_pages)
+static unsigned long __init xen_count_remap_pages(
+ unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages,
+ unsigned long remap_pages)
+{
+ if (start_pfn >= nr_pages)
+ return remap_pages;
+
+ return remap_pages + min(end_pfn, nr_pages) - start_pfn;
+}
+
+static unsigned long __init xen_foreach_remap_area(unsigned long nr_pages,
+ unsigned long (*func)(unsigned long start_pfn, unsigned long end_pfn,
+ unsigned long nr_pages, unsigned long last_val))
{
phys_addr_t start = 0;
- unsigned long last_pfn = nr_pages;
+ unsigned long ret_val = 0;
const struct e820entry *entry = xen_e820_map;
int i;
/*
* Combine non-RAM regions and gaps until a RAM region (or the
- * end of the map) is reached, then set the 1:1 map and
- * remap the memory in those non-RAM regions.
+ * end of the map) is reached, then call the provided function
+ * to perform its duty on the non-RAM region.
*
* The combined non-RAM regions are rounded to a whole number
* of pages so any partial pages are accessible via the 1:1
@@ -466,14 +481,13 @@ static void __init xen_set_identity_and_remap(unsigned long nr_pages)
end_pfn = PFN_UP(entry->addr);
if (start_pfn < end_pfn)
- last_pfn = xen_set_identity_and_remap_chunk(
- start_pfn, end_pfn, nr_pages,
- last_pfn);
+ ret_val = func(start_pfn, end_pfn, nr_pages,
+ ret_val);
start = end;
}
}
- pr_info("Released %ld page(s)\n", xen_released_pages);
+ return ret_val;
}
/*
@@ -596,35 +610,6 @@ static void __init xen_ignore_unusable(void)
}
}
-static unsigned long __init xen_count_remap_pages(unsigned long max_pfn)
-{
- unsigned long extra = 0;
- unsigned long start_pfn, end_pfn;
- const struct e820entry *entry = xen_e820_map;
- int i;
-
- end_pfn = 0;
- for (i = 0; i < xen_e820_map_entries; i++, entry++) {
- start_pfn = PFN_DOWN(entry->addr);
- /* Adjacent regions on non-page boundaries handling! */
- end_pfn = min(end_pfn, start_pfn);
-
- if (start_pfn >= max_pfn)
- return extra + max_pfn - end_pfn;
-
- /* Add any holes in map to result. */
- extra += start_pfn - end_pfn;
-
- end_pfn = PFN_UP(entry->addr + entry->size);
- end_pfn = min(end_pfn, max_pfn);
-
- if (entry->type != E820_RAM)
- extra += end_pfn - start_pfn;
- }
-
- return extra;
-}
-
bool __init xen_is_e820_reserved(phys_addr_t start, phys_addr_t size)
{
struct e820entry *entry;
@@ -804,7 +789,7 @@ char * __init xen_memory_setup(void)
max_pages = xen_get_max_pages();
/* How many extra pages do we need due to remapping? */
- max_pages += xen_count_remap_pages(max_pfn);
+ max_pages += xen_foreach_remap_area(max_pfn, xen_count_remap_pages);
if (max_pages > max_pfn)
extra_pages += max_pages - max_pfn;
@@ -922,7 +907,9 @@ char * __init xen_memory_setup(void)
* Set identity map on non-RAM pages and prepare remapping the
* underlying RAM.
*/
- xen_set_identity_and_remap(max_pfn);
+ xen_foreach_remap_area(max_pfn, xen_set_identity_and_remap_chunk);
+
+ pr_info("Released %ld page(s)\n", xen_released_pages);
return "Xen";
}
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index a0a4e554c6f1..6deba5bc7e34 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -290,11 +290,11 @@ static int xen_vcpuop_set_next_event(unsigned long delta,
WARN_ON(!clockevent_state_oneshot(evt));
single.timeout_abs_ns = get_abs_timeout(delta);
- single.flags = VCPU_SSHOTTMR_future;
+ /* Get an event anyway, even if the timeout is already expired */
+ single.flags = 0;
ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &single);
-
- BUG_ON(ret != 0 && ret != -ETIME);
+ BUG_ON(ret != 0);
return ret;
}