diff options
Diffstat (limited to 'arch/x86/xen')
-rw-r--r-- | arch/x86/xen/Kconfig | 10 | ||||
-rw-r--r-- | arch/x86/xen/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/xen/apic.c | 2 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 2 | ||||
-rw-r--r-- | arch/x86/xen/enlighten_hvm.c | 50 | ||||
-rw-r--r-- | arch/x86/xen/enlighten_pv.c | 135 | ||||
-rw-r--r-- | arch/x86/xen/mmu_pv.c | 1 | ||||
-rw-r--r-- | arch/x86/xen/pci-swiotlb-xen.c | 96 | ||||
-rw-r--r-- | arch/x86/xen/pmu.c | 113 | ||||
-rw-r--r-- | arch/x86/xen/pmu.h | 3 | ||||
-rw-r--r-- | arch/x86/xen/setup.c | 27 | ||||
-rw-r--r-- | arch/x86/xen/smp_hvm.c | 6 | ||||
-rw-r--r-- | arch/x86/xen/smp_pv.c | 35 | ||||
-rw-r--r-- | arch/x86/xen/suspend_hvm.c | 10 | ||||
-rw-r--r-- | arch/x86/xen/time.c | 24 | ||||
-rw-r--r-- | arch/x86/xen/vga.c | 12 | ||||
-rw-r--r-- | arch/x86/xen/xen-asm.S | 52 | ||||
-rw-r--r-- | arch/x86/xen/xen-head.S | 19 | ||||
-rw-r--r-- | arch/x86/xen/xen-ops.h | 6 |
19 files changed, 310 insertions, 295 deletions
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 6bcd3d8ca6ac..9b1ec5d8c99c 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -23,6 +23,7 @@ config XEN_PV select PARAVIRT_XXL select XEN_HAVE_PVMMU select XEN_HAVE_VPMU + select GUEST_PERF_EVENTS help Support running as a Xen PV guest. @@ -91,3 +92,12 @@ config XEN_DOM0 select X86_X2APIC if XEN_PVH && X86_64 help Support running as a Xen Dom0 guest. + +config XEN_PV_MSR_SAFE + bool "Always use safe MSR accesses in PV guests" + default y + depends on XEN_PV + help + Use safe (not faulting) MSR access functions even if the MSR access + should not fault anyway. + The default can be changed by using the "xen_msr_safe" boot parameter. diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 4953260e281c..3c5b52fbe4a7 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -47,6 +47,4 @@ obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o obj-$(CONFIG_XEN_PV_DOM0) += vga.o -obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o - obj-$(CONFIG_XEN_EFI) += efi.o diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index 0d46cc283cf5..62d34b6611c5 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -51,7 +51,7 @@ static u32 xen_apic_read(u32 reg) .interface_version = XENPF_INTERFACE_VERSION, .u.pcpu_info.xen_cpuid = 0, }; - int ret = 0; + int ret; /* Shouldn't need this as APIC is turned off for PV, and we only * get called on the bootup processor. But just in case. */ diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 30c6e986a6cd..b8db2148c07d 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -51,7 +51,7 @@ EXPORT_SYMBOL_GPL(xen_start_info); struct shared_info xen_dummy_shared_info; -__read_mostly int xen_have_vector_callback; +__read_mostly bool xen_have_vector_callback = true; EXPORT_SYMBOL_GPL(xen_have_vector_callback); /* diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 42300941ec29..c1cd28e915a3 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -4,11 +4,15 @@ #include <linux/cpu.h> #include <linux/kexec.h> #include <linux/memblock.h> +#include <linux/virtio_anchor.h> #include <xen/features.h> #include <xen/events.h> +#include <xen/hvm.h> +#include <xen/interface/hvm/hvm_op.h> #include <xen/interface/memory.h> +#include <asm/apic.h> #include <asm/cpu.h> #include <asm/smp.h> #include <asm/io_apic.h> @@ -29,6 +33,9 @@ static unsigned long shared_info_pfn; +__ro_after_init bool xen_percpu_upcall; +EXPORT_SYMBOL_GPL(xen_percpu_upcall); + void xen_hvm_init_shared_info(void) { struct xen_add_to_physmap xatp; @@ -124,6 +131,9 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_xen_hvm_callback) { struct pt_regs *old_regs = set_irq_regs(regs); + if (xen_percpu_upcall) + ack_APIC_irq(); + inc_irq_stat(irq_hv_callback_count); xen_hvm_evtchn_do_upcall(); @@ -167,6 +177,15 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu) if (!xen_have_vector_callback) return 0; + if (xen_percpu_upcall) { + rc = xen_set_upcall_vector(cpu); + if (rc) { + WARN(1, "HVMOP_set_evtchn_upcall_vector" + " for CPU %d failed: %d\n", cpu, rc); + return rc; + } + } + if (xen_feature(XENFEAT_hvm_safe_pvclock)) xen_setup_timer(cpu); @@ -184,17 +203,17 @@ static int xen_cpu_dead_hvm(unsigned int cpu) if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock)) xen_teardown_timer(cpu); - - return 0; + return 0; } -static bool no_vector_callback __initdata; - static void __init xen_hvm_guest_init(void) { if (xen_pv_domain()) return; + if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT)) + virtio_set_mem_acc_cb(xen_virtio_restricted_mem_acc); + init_hvm_pv_info(); reserve_shared_info(); @@ -209,9 +228,6 @@ static void __init xen_hvm_guest_init(void) xen_panic_handler_init(); - if (!no_vector_callback && xen_feature(XENFEAT_hvm_callback_vector)) - xen_have_vector_callback = 1; - xen_hvm_smp_init(); WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_hvm, xen_cpu_dead_hvm)); xen_unplug_emulated_devices(); @@ -237,20 +253,19 @@ early_param("xen_nopv", xen_parse_nopv); static __init int xen_parse_no_vector_callback(char *arg) { - no_vector_callback = true; + xen_have_vector_callback = false; return 0; } early_param("xen_no_vector_callback", xen_parse_no_vector_callback); -bool __init xen_hvm_need_lapic(void) +static __init bool xen_x2apic_available(void) { - if (xen_pv_domain()) - return false; - if (!xen_hvm_domain()) - return false; - if (xen_feature(XENFEAT_hvm_pirqs) && xen_have_vector_callback) - return false; - return true; + return x2apic_supported(); +} + +static bool __init msi_ext_dest_id(void) +{ + return cpuid_eax(xen_cpuid_base() + 4) & XEN_HVM_CPUID_EXT_DEST_ID; } static __init void xen_hvm_guest_late_init(void) @@ -312,9 +327,10 @@ struct hypervisor_x86 x86_hyper_xen_hvm __initdata = { .detect = xen_platform_hvm, .type = X86_HYPER_XEN_HVM, .init.init_platform = xen_hvm_guest_init, - .init.x2apic_available = xen_x2apic_para_available, + .init.x2apic_available = xen_x2apic_available, .init.init_mem_mapping = xen_hvm_init_mem_mapping, .init.guest_late_init = xen_hvm_guest_late_init, + .init.msi_ext_dest_id = msi_ext_dest_id, .runtime.pin_vcpu = xen_pin_vcpu, .ignore_nopv = true, }; diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 5004feb16783..f82857e48815 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -30,7 +30,8 @@ #include <linux/pci.h> #include <linux/gfp.h> #include <linux/edd.h> -#include <linux/objtool.h> +#include <linux/reboot.h> +#include <linux/virtio_anchor.h> #include <xen/xen.h> #include <xen/events.h> @@ -107,8 +108,22 @@ struct tls_descs { */ static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc); +static __read_mostly bool xen_msr_safe = IS_ENABLED(CONFIG_XEN_PV_MSR_SAFE); + +static int __init parse_xen_msr_safe(char *str) +{ + if (str) + return strtobool(str, &xen_msr_safe); + return -EINVAL; +} +early_param("xen_msr_safe", parse_xen_msr_safe); + static void __init xen_pv_init_platform(void) { + /* PV guests can't operate virtio devices without grants. */ + if (IS_ENABLED(CONFIG_XEN_VIRTIO)) + virtio_set_mem_acc_cb(xen_virtio_restricted_mem_acc); + populate_extra_pte(fix_to_virt(FIX_PARAVIRT_BOOTMAP)); set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_start_info->shared_info); @@ -165,7 +180,6 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, *bx &= maskebx; } -STACK_FRAME_NON_STANDARD(xen_cpuid); /* XEN_EMULATE_PREFIX */ static bool __init xen_check_mwait(void) { @@ -624,6 +638,9 @@ static struct trap_array_entry trap_array[] = { TRAP_ENTRY(exc_coprocessor_error, false ), TRAP_ENTRY(exc_alignment_check, false ), TRAP_ENTRY(exc_simd_coprocessor_error, false ), +#ifdef CONFIG_X86_KERNEL_IBT + TRAP_ENTRY(exc_control_protection, false ), +#endif }; static bool __ref get_trap_addr(void **addr, unsigned int ist) @@ -758,6 +775,7 @@ static void xen_load_idt(const struct desc_ptr *desc) { static DEFINE_SPINLOCK(lock); static struct trap_info traps[257]; + static const struct trap_info zero = { }; unsigned out; trace_xen_cpu_load_idt(desc); @@ -767,7 +785,7 @@ static void xen_load_idt(const struct desc_ptr *desc) memcpy(this_cpu_ptr(&idt_desc), desc, sizeof(idt_desc)); out = xen_convert_trap_info(desc, traps, false); - memset(&traps[out], 0, sizeof(traps[0])); + traps[out] = zero; xen_mc_flush(); if (HYPERVISOR_set_trap_table(traps)) @@ -909,14 +927,18 @@ static void xen_write_cr4(unsigned long cr4) native_write_cr4(cr4); } -static u64 xen_read_msr_safe(unsigned int msr, int *err) +static u64 xen_do_read_msr(unsigned int msr, int *err) { - u64 val; + u64 val = 0; /* Avoid uninitialized value for safe variant. */ if (pmu_msr_read(msr, &val, err)) return val; - val = native_read_msr_safe(msr, err); + if (err) + val = native_read_msr_safe(msr, err); + else + val = native_read_msr(msr); + switch (msr) { case MSR_IA32_APICBASE: val &= ~X2APIC_ENABLE; @@ -925,23 +947,39 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err) return val; } -static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) +static void set_seg(unsigned int which, unsigned int low, unsigned int high, + int *err) { - int ret; - unsigned int which; - u64 base; + u64 base = ((u64)high << 32) | low; - ret = 0; + if (HYPERVISOR_set_segment_base(which, base) == 0) + return; + if (err) + *err = -EIO; + else + WARN(1, "Xen set_segment_base(%u, %llx) failed\n", which, base); +} + +/* + * Support write_msr_safe() and write_msr() semantics. + * With err == NULL write_msr() semantics are selected. + * Supplying an err pointer requires err to be pre-initialized with 0. + */ +static void xen_do_write_msr(unsigned int msr, unsigned int low, + unsigned int high, int *err) +{ switch (msr) { - case MSR_FS_BASE: which = SEGBASE_FS; goto set; - case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set; - case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set; - - set: - base = ((u64)high << 32) | low; - if (HYPERVISOR_set_segment_base(which, base) != 0) - ret = -EIO; + case MSR_FS_BASE: + set_seg(SEGBASE_FS, low, high, err); + break; + + case MSR_KERNEL_GS_BASE: + set_seg(SEGBASE_GS_USER, low, high, err); + break; + + case MSR_GS_BASE: + set_seg(SEGBASE_GS_KERNEL, low, high, err); break; case MSR_STAR: @@ -957,31 +995,42 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) break; default: - if (!pmu_msr_write(msr, low, high, &ret)) - ret = native_write_msr_safe(msr, low, high); + if (!pmu_msr_write(msr, low, high, err)) { + if (err) + *err = native_write_msr_safe(msr, low, high); + else + native_write_msr(msr, low, high); + } } +} + +static u64 xen_read_msr_safe(unsigned int msr, int *err) +{ + return xen_do_read_msr(msr, err); +} + +static int xen_write_msr_safe(unsigned int msr, unsigned int low, + unsigned int high) +{ + int err = 0; - return ret; + xen_do_write_msr(msr, low, high, &err); + + return err; } static u64 xen_read_msr(unsigned int msr) { - /* - * This will silently swallow a #GP from RDMSR. It may be worth - * changing that. - */ int err; - return xen_read_msr_safe(msr, &err); + return xen_do_read_msr(msr, xen_msr_safe ? &err : NULL); } static void xen_write_msr(unsigned int msr, unsigned low, unsigned high) { - /* - * This will silently swallow a #GP from WRMSR. It may be worth - * changing that. - */ - xen_write_msr_safe(msr, low, high); + int err; + + xen_do_write_msr(msr, low, high, xen_msr_safe ? &err : NULL); } /* This is called once we have the cpu_possible_mask */ @@ -1068,8 +1117,7 @@ static void xen_machine_halt(void) static void xen_machine_power_off(void) { - if (pm_power_off) - pm_power_off(); + do_kernel_power_off(); xen_reboot(SHUTDOWN_poweroff); } @@ -1177,16 +1225,26 @@ static void __init xen_domu_set_legacy_features(void) x86_platform.legacy.rtc = 0; } +extern void early_xen_iret_patch(void); + /* First C function to be called on Xen boot */ -asmlinkage __visible void __init xen_start_kernel(void) +asmlinkage __visible void __init xen_start_kernel(struct start_info *si) { struct physdev_set_iopl set_iopl; unsigned long initrd_start = 0; int rc; - if (!xen_start_info) + if (!si) return; + clear_bss(); + + xen_start_info = si; + + __text_gen_insn(&early_xen_iret_patch, + JMP32_INSN_OPCODE, &early_xen_iret_patch, &xen_iret, + JMP32_INSN_SIZE); + xen_domain_type = XEN_PV_DOMAIN; xen_start_flags = xen_start_info->flags; @@ -1195,7 +1253,6 @@ asmlinkage __visible void __init xen_start_kernel(void) /* Install Xen paravirt ops */ pv_info = xen_info; pv_ops.cpu = xen_cpu_ops.cpu; - paravirt_iret = xen_iret; xen_init_irq_ops(); /* @@ -1341,10 +1398,6 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_acpi_sleep_register(); - /* Avoid searching for BIOS MP tables */ - x86_init.mpparse.find_smp_config = x86_init_noop; - x86_init.mpparse.get_smp_config = x86_init_uint_noop; - xen_boot_params_init_edd(); #ifdef CONFIG_ACPI diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 00354866921b..ee29fb558f2e 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -80,6 +80,7 @@ #include <xen/interface/version.h> #include <xen/interface/memory.h> #include <xen/hvc-console.h> +#include <xen/swiotlb-xen.h> #include "multicalls.h" #include "mmu.h" diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c deleted file mode 100644 index 46df59aeaa06..000000000000 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ /dev/null @@ -1,96 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* Glue code to lib/swiotlb-xen.c */ - -#include <linux/dma-map-ops.h> -#include <linux/pci.h> -#include <xen/swiotlb-xen.h> - -#include <asm/xen/hypervisor.h> -#include <xen/xen.h> -#include <asm/iommu_table.h> - - -#include <asm/xen/swiotlb-xen.h> -#ifdef CONFIG_X86_64 -#include <asm/iommu.h> -#include <asm/dma.h> -#endif -#include <linux/export.h> - -static int xen_swiotlb __read_mostly; - -/* - * pci_xen_swiotlb_detect - set xen_swiotlb to 1 if necessary - * - * This returns non-zero if we are forced to use xen_swiotlb (by the boot - * option). - */ -int __init pci_xen_swiotlb_detect(void) -{ - - if (!xen_pv_domain()) - return 0; - - /* If running as PV guest, either iommu=soft, or swiotlb=force will - * activate this IOMMU. If running as PV privileged, activate it - * irregardless. - */ - if (xen_initial_domain() || swiotlb || swiotlb_force == SWIOTLB_FORCE) - xen_swiotlb = 1; - - /* If we are running under Xen, we MUST disable the native SWIOTLB. - * Don't worry about swiotlb_force flag activating the native, as - * the 'swiotlb' flag is the only one turning it on. */ - swiotlb = 0; - -#ifdef CONFIG_X86_64 - /* pci_swiotlb_detect_4gb turns on native SWIOTLB if no_iommu == 0 - * (so no iommu=X command line over-writes). - * Considering that PV guests do not want the *native SWIOTLB* but - * only Xen SWIOTLB it is not useful to us so set no_iommu=1 here. - */ - if (max_pfn > MAX_DMA32_PFN) - no_iommu = 1; -#endif - return xen_swiotlb; -} - -static void __init pci_xen_swiotlb_init(void) -{ - if (xen_swiotlb) { - xen_swiotlb_init_early(); - dma_ops = &xen_swiotlb_dma_ops; - -#ifdef CONFIG_PCI - /* Make sure ACS will be enabled */ - pci_request_acs(); -#endif - } -} - -int pci_xen_swiotlb_init_late(void) -{ - int rc; - - if (xen_swiotlb) - return 0; - - rc = xen_swiotlb_init(); - if (rc) - return rc; - - dma_ops = &xen_swiotlb_dma_ops; -#ifdef CONFIG_PCI - /* Make sure ACS will be enabled */ - pci_request_acs(); -#endif - - return 0; -} -EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late); - -IOMMU_INIT_FINISH(pci_xen_swiotlb_detect, - NULL, - pci_xen_swiotlb_init, - NULL); diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c index e13b0b49fcdf..246d67dab510 100644 --- a/arch/x86/xen/pmu.c +++ b/arch/x86/xen/pmu.c @@ -131,6 +131,10 @@ static inline uint32_t get_fam15h_addr(u32 addr) static inline bool is_amd_pmu_msr(unsigned int msr) { + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + return false; + if ((msr >= MSR_F15H_PERF_CTL && msr < MSR_F15H_PERF_CTR + (amd_num_counters * 2)) || (msr >= MSR_K7_EVNTSEL0 && @@ -140,10 +144,15 @@ static inline bool is_amd_pmu_msr(unsigned int msr) return false; } -static int is_intel_pmu_msr(u32 msr_index, int *type, int *index) +static bool is_intel_pmu_msr(u32 msr_index, int *type, int *index) { u32 msr_index_pmc; + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL && + boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR && + boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN) + return false; + switch (msr_index) { case MSR_CORE_PERF_FIXED_CTR_CTRL: case MSR_IA32_DS_AREA: @@ -290,48 +299,52 @@ static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read) return false; } +static bool pmu_msr_chk_emulated(unsigned int msr, uint64_t *val, bool is_read, + bool *emul) +{ + int type, index = 0; + + if (is_amd_pmu_msr(msr)) + *emul = xen_amd_pmu_emulate(msr, val, is_read); + else if (is_intel_pmu_msr(msr, &type, &index)) + *emul = xen_intel_pmu_emulate(msr, val, type, index, is_read); + else + return false; + + return true; +} + bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err) { - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { - if (is_amd_pmu_msr(msr)) { - if (!xen_amd_pmu_emulate(msr, val, 1)) - *val = native_read_msr_safe(msr, err); - return true; - } - } else { - int type, index; + bool emulated; - if (is_intel_pmu_msr(msr, &type, &index)) { - if (!xen_intel_pmu_emulate(msr, val, type, index, 1)) - *val = native_read_msr_safe(msr, err); - return true; - } + if (!pmu_msr_chk_emulated(msr, val, true, &emulated)) + return false; + + if (!emulated) { + *val = err ? native_read_msr_safe(msr, err) + : native_read_msr(msr); } - return false; + return true; } bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err) { uint64_t val = ((uint64_t)high << 32) | low; + bool emulated; - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { - if (is_amd_pmu_msr(msr)) { - if (!xen_amd_pmu_emulate(msr, &val, 0)) - *err = native_write_msr_safe(msr, low, high); - return true; - } - } else { - int type, index; + if (!pmu_msr_chk_emulated(msr, &val, false, &emulated)) + return false; - if (is_intel_pmu_msr(msr, &type, &index)) { - if (!xen_intel_pmu_emulate(msr, &val, type, index, 0)) - *err = native_write_msr_safe(msr, low, high); - return true; - } + if (!emulated) { + if (err) + *err = native_write_msr_safe(msr, low, high); + else + native_write_msr(msr, low, high); } - return false; + return true; } static unsigned long long xen_amd_read_pmc(int counter) @@ -413,34 +426,29 @@ int pmu_apic_update(uint32_t val) } /* perf callbacks */ -static int xen_is_in_guest(void) +static unsigned int xen_guest_state(void) { const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); + unsigned int state = 0; if (!xenpmu_data) { pr_warn_once("%s: pmudata not initialized\n", __func__); - return 0; + return state; } if (!xen_initial_domain() || (xenpmu_data->domain_id >= DOMID_SELF)) - return 0; - - return 1; -} + return state; -static int xen_is_user_mode(void) -{ - const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); + state |= PERF_GUEST_ACTIVE; - if (!xenpmu_data) { - pr_warn_once("%s: pmudata not initialized\n", __func__); - return 0; + if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV) { + if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER) + state |= PERF_GUEST_USER; + } else if (xenpmu_data->pmu.r.regs.cpl & 3) { + state |= PERF_GUEST_USER; } - if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV) - return (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER); - else - return !!(xenpmu_data->pmu.r.regs.cpl & 3); + return state; } static unsigned long xen_get_guest_ip(void) @@ -456,9 +464,8 @@ static unsigned long xen_get_guest_ip(void) } static struct perf_guest_info_callbacks xen_guest_cbs = { - .is_in_guest = xen_is_in_guest, - .is_user_mode = xen_is_user_mode, - .get_guest_ip = xen_get_guest_ip, + .state = xen_guest_state, + .get_ip = xen_get_guest_ip, }; /* Convert registers from Xen's format to Linux' */ @@ -512,10 +519,7 @@ irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id) return ret; } -bool is_xen_pmu(int cpu) -{ - return (get_xenpmu_data() != NULL); -} +bool is_xen_pmu; void xen_pmu_init(int cpu) { @@ -526,7 +530,7 @@ void xen_pmu_init(int cpu) BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE); - if (xen_hvm_domain()) + if (xen_hvm_domain() || (cpu != 0 && !is_xen_pmu)) return; xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL); @@ -547,7 +551,8 @@ void xen_pmu_init(int cpu) per_cpu(xenpmu_shared, cpu).xenpmu_data = xenpmu_data; per_cpu(xenpmu_shared, cpu).flags = 0; - if (cpu == 0) { + if (!is_xen_pmu) { + is_xen_pmu = true; perf_register_guest_info_callbacks(&xen_guest_cbs); xen_pmu_arch_init(); } diff --git a/arch/x86/xen/pmu.h b/arch/x86/xen/pmu.h index 0e83a160589b..65c58894fc79 100644 --- a/arch/x86/xen/pmu.h +++ b/arch/x86/xen/pmu.h @@ -4,6 +4,8 @@ #include <xen/interface/xenpmu.h> +extern bool is_xen_pmu; + irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id); #ifdef CONFIG_XEN_HAVE_VPMU void xen_pmu_init(int cpu); @@ -12,7 +14,6 @@ void xen_pmu_finish(int cpu); static inline void xen_pmu_init(int cpu) {} static inline void xen_pmu_finish(int cpu) {} #endif -bool is_xen_pmu(int cpu); bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err); bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err); int pmu_apic_update(uint32_t reg); diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index af216feb63d9..4f4309500559 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -719,7 +719,7 @@ static void __init xen_reserve_xen_mfnlist(void) } /** - * machine_specific_memory_setup - Hook for machine specific memory setup. + * xen_memory_setup - Hook for machine specific memory setup. **/ char * __init xen_memory_setup(void) { @@ -910,36 +910,25 @@ static int register_callback(unsigned type, const void *func) void xen_enable_sysenter(void) { - int ret; - unsigned sysenter_feature; - - sysenter_feature = X86_FEATURE_SYSENTER32; - - if (!boot_cpu_has(sysenter_feature)) - return; - - ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target); - if(ret != 0) - setup_clear_cpu_cap(sysenter_feature); + if (cpu_feature_enabled(X86_FEATURE_SYSENTER32) && + register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat)) + setup_clear_cpu_cap(X86_FEATURE_SYSENTER32); } void xen_enable_syscall(void) { int ret; - ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); + ret = register_callback(CALLBACKTYPE_syscall, xen_entry_SYSCALL_64); if (ret != 0) { printk(KERN_ERR "Failed to set syscall callback: %d\n", ret); /* Pretty fatal; 64-bit userspace has no other mechanism for syscalls. */ } - if (boot_cpu_has(X86_FEATURE_SYSCALL32)) { - ret = register_callback(CALLBACKTYPE_syscall32, - xen_syscall32_target); - if (ret != 0) - setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); - } + if (cpu_feature_enabled(X86_FEATURE_SYSCALL32) && + register_callback(CALLBACKTYPE_syscall32, xen_entry_SYSCALL_compat)) + setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); } static void __init xen_pvmmu_arch_setup(void) diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c index 6ff3c887e0b9..b70afdff419c 100644 --- a/arch/x86/xen/smp_hvm.c +++ b/arch/x86/xen/smp_hvm.c @@ -20,6 +20,12 @@ static void __init xen_hvm_smp_prepare_boot_cpu(void) xen_vcpu_setup(0); /* + * Called again in case the kernel boots on vcpu >= MAX_VIRT_CPUS. + * Refer to comments in xen_hvm_init_time_ops(). + */ + xen_hvm_init_time_ops(); + + /* * The alternative logic (which patches the unlock/lock) runs before * the smp bootup up code is activated. Hence we need to set this up * the core kernel is being patched. Otherwise we will have only diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 6a8f3b53ab83..480be82e9b7b 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -129,7 +129,7 @@ int xen_smp_intr_init_pv(unsigned int cpu) per_cpu(xen_irq_work, cpu).irq = rc; per_cpu(xen_irq_work, cpu).name = callfunc_name; - if (is_xen_pmu(cpu)) { + if (is_xen_pmu) { pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu); rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu, xen_pmu_irq_handler, @@ -148,28 +148,12 @@ int xen_smp_intr_init_pv(unsigned int cpu) return rc; } -static void __init xen_fill_possible_map(void) -{ - int i, rc; - - if (xen_initial_domain()) - return; - - for (i = 0; i < nr_cpu_ids; i++) { - rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); - if (rc >= 0) { - num_processors++; - set_cpu_possible(i, true); - } - } -} - -static void __init xen_filter_cpu_maps(void) +static void __init _get_smp_config(unsigned int early) { int i, rc; unsigned int subtract = 0; - if (!xen_initial_domain()) + if (early) return; num_processors = 0; @@ -195,7 +179,7 @@ static void __init xen_filter_cpu_maps(void) * hypercall to expand the max number of VCPUs an already * running guest has. So cap it up to X. */ if (subtract) - nr_cpu_ids = nr_cpu_ids - subtract; + set_nr_cpu_ids(nr_cpu_ids - subtract); #endif } @@ -210,7 +194,6 @@ static void __init xen_pv_smp_prepare_boot_cpu(void) * sure the old memory can be recycled. */ make_lowmem_page_readwrite(xen_initial_gdt); - xen_filter_cpu_maps(); xen_setup_vcpu_info_placement(); /* @@ -277,8 +260,11 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) return 0; ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); - if (ctxt == NULL) + if (ctxt == NULL) { + cpumask_clear_cpu(cpu, xen_cpu_initialized_map); + cpumask_clear_cpu(cpu, cpu_callout_mask); return -ENOMEM; + } gdt = get_cpu_gdt_rw(cpu); @@ -476,5 +462,8 @@ static const struct smp_ops xen_smp_ops __initconst = { void __init xen_smp_init(void) { smp_ops = xen_smp_ops; - xen_fill_possible_map(); + + /* Avoid searching for BIOS MP tables */ + x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.mpparse.get_smp_config = _get_smp_config; } diff --git a/arch/x86/xen/suspend_hvm.c b/arch/x86/xen/suspend_hvm.c index 9d548b0c772f..0c4f7554b7cc 100644 --- a/arch/x86/xen/suspend_hvm.c +++ b/arch/x86/xen/suspend_hvm.c @@ -5,6 +5,7 @@ #include <xen/hvm.h> #include <xen/features.h> #include <xen/interface/features.h> +#include <xen/events.h> #include "xen-ops.h" @@ -14,6 +15,13 @@ void xen_hvm_post_suspend(int suspend_cancelled) xen_hvm_init_shared_info(); xen_vcpu_restore(); } - xen_setup_callback_vector(); + if (xen_percpu_upcall) { + unsigned int cpu; + + for_each_online_cpu(cpu) + BUG_ON(xen_set_upcall_vector(cpu)); + } else { + xen_setup_callback_vector(); + } xen_unplug_emulated_devices(); } diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index d9c945ee1100..9ef0a5cca96e 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -558,6 +558,11 @@ static void xen_hvm_setup_cpu_clockevents(void) void __init xen_hvm_init_time_ops(void) { + static bool hvm_time_initialized; + + if (hvm_time_initialized) + return; + /* * vector callback is needed otherwise we cannot receive interrupts * on cpu > 0 and at this point we don't know how many cpus are @@ -567,7 +572,22 @@ void __init xen_hvm_init_time_ops(void) return; if (!xen_feature(XENFEAT_hvm_safe_pvclock)) { - pr_info("Xen doesn't support pvclock on HVM, disable pv timer"); + pr_info_once("Xen doesn't support pvclock on HVM, disable pv timer"); + return; + } + + /* + * Only MAX_VIRT_CPUS 'vcpu_info' are embedded inside 'shared_info'. + * The __this_cpu_read(xen_vcpu) is still NULL when Xen HVM guest + * boots on vcpu >= MAX_VIRT_CPUS (e.g., kexec), To access + * __this_cpu_read(xen_vcpu) via xen_clocksource_read() will panic. + * + * The xen_hvm_init_time_ops() should be called again later after + * __this_cpu_read(xen_vcpu) is available. + */ + if (!__this_cpu_read(xen_vcpu)) { + pr_info("Delay xen_init_time_common() as kernel is running on vcpu=%d\n", + xen_vcpu_nr(0)); return; } @@ -577,6 +597,8 @@ void __init xen_hvm_init_time_ops(void) x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents; x86_platform.set_wallclock = xen_set_wallclock; + + hvm_time_initialized = true; } #endif diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c index e336f223f7f4..14ea32e734d5 100644 --- a/arch/x86/xen/vga.c +++ b/arch/x86/xen/vga.c @@ -57,16 +57,20 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size; screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos; + if (size >= offsetof(struct dom0_vga_console_info, + u.vesa_lfb.ext_lfb_base) + + sizeof(info->u.vesa_lfb.ext_lfb_base) + && info->u.vesa_lfb.ext_lfb_base) { + screen_info->ext_lfb_base = info->u.vesa_lfb.ext_lfb_base; + screen_info->capabilities |= VIDEO_CAPABILITY_64BIT_BASE; + } + if (info->video_type == XEN_VGATYPE_EFI_LFB) { screen_info->orig_video_isVGA = VIDEO_TYPE_EFI; break; } if (size >= offsetof(struct dom0_vga_console_info, - u.vesa_lfb.gbl_caps) - + sizeof(info->u.vesa_lfb.gbl_caps)) - screen_info->capabilities = info->u.vesa_lfb.gbl_caps; - if (size >= offsetof(struct dom0_vga_console_info, u.vesa_lfb.mode_attrs) + sizeof(info->u.vesa_lfb.mode_attrs)) screen_info->vesa_attributes = info->u.vesa_lfb.mode_attrs; diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 444d824775f6..6b4fdf6b9542 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -29,7 +29,7 @@ */ SYM_FUNC_START(xen_irq_disable_direct) movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask - ret + RET SYM_FUNC_END(xen_irq_disable_direct) /* @@ -58,7 +58,7 @@ SYM_FUNC_START(check_events) pop %rcx pop %rax FRAME_END - ret + RET SYM_FUNC_END(check_events) /* @@ -84,7 +84,7 @@ SYM_FUNC_START(xen_irq_enable_direct) call check_events 1: FRAME_END - ret + RET SYM_FUNC_END(xen_irq_enable_direct) /* @@ -100,7 +100,7 @@ SYM_FUNC_START(xen_save_fl_direct) testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask setz %ah addb %ah, %ah - ret + RET SYM_FUNC_END(xen_save_fl_direct) SYM_FUNC_START(xen_read_cr2) @@ -108,20 +108,21 @@ SYM_FUNC_START(xen_read_cr2) _ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX _ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX FRAME_END - ret + RET SYM_FUNC_END(xen_read_cr2); SYM_FUNC_START(xen_read_cr2_direct) FRAME_BEGIN _ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX FRAME_END - ret + RET SYM_FUNC_END(xen_read_cr2_direct); .popsection .macro xen_pv_trap name SYM_CODE_START(xen_\name) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY + ENDBR pop %rcx pop %r11 jmp \name @@ -147,6 +148,9 @@ xen_pv_trap asm_exc_page_fault xen_pv_trap asm_exc_spurious_interrupt_bug xen_pv_trap asm_exc_coprocessor_error xen_pv_trap asm_exc_alignment_check +#ifdef CONFIG_X86_KERNEL_IBT +xen_pv_trap asm_exc_control_protection +#endif #ifdef CONFIG_X86_MCE xen_pv_trap asm_xenpv_exc_machine_check #endif /* CONFIG_X86_MCE */ @@ -162,6 +166,7 @@ SYM_CODE_START(xen_early_idt_handler_array) i = 0 .rept NUM_EXCEPTION_VECTORS UNWIND_HINT_EMPTY + ENDBR pop %rcx pop %r11 jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE @@ -189,6 +194,7 @@ hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 */ SYM_CODE_START(xen_iret) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR pushq $0 jmp hypercall_iret SYM_CODE_END(xen_iret) @@ -228,8 +234,9 @@ SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode) */ /* Normal 64-bit system call target */ -SYM_CODE_START(xen_syscall_target) - UNWIND_HINT_EMPTY +SYM_CODE_START(xen_entry_SYSCALL_64) + UNWIND_HINT_ENTRY + ENDBR popq %rcx popq %r11 @@ -242,13 +249,14 @@ SYM_CODE_START(xen_syscall_target) movq $__USER_CS, 1*8(%rsp) jmp entry_SYSCALL_64_after_hwframe -SYM_CODE_END(xen_syscall_target) +SYM_CODE_END(xen_entry_SYSCALL_64) #ifdef CONFIG_IA32_EMULATION /* 32-bit compat syscall target */ -SYM_CODE_START(xen_syscall32_target) - UNWIND_HINT_EMPTY +SYM_CODE_START(xen_entry_SYSCALL_compat) + UNWIND_HINT_ENTRY + ENDBR popq %rcx popq %r11 @@ -261,11 +269,12 @@ SYM_CODE_START(xen_syscall32_target) movq $__USER32_CS, 1*8(%rsp) jmp entry_SYSCALL_compat_after_hwframe -SYM_CODE_END(xen_syscall32_target) +SYM_CODE_END(xen_entry_SYSCALL_compat) /* 32-bit compat sysenter target */ -SYM_CODE_START(xen_sysenter_target) - UNWIND_HINT_EMPTY +SYM_CODE_START(xen_entry_SYSENTER_compat) + UNWIND_HINT_ENTRY + ENDBR /* * NB: Xen is polite and clears TF from EFLAGS for us. This means * that we don't need to guard against single step exceptions here. @@ -282,18 +291,19 @@ SYM_CODE_START(xen_sysenter_target) movq $__USER32_CS, 1*8(%rsp) jmp entry_SYSENTER_compat_after_hwframe -SYM_CODE_END(xen_sysenter_target) +SYM_CODE_END(xen_entry_SYSENTER_compat) #else /* !CONFIG_IA32_EMULATION */ -SYM_CODE_START(xen_syscall32_target) -SYM_CODE_START(xen_sysenter_target) - UNWIND_HINT_EMPTY +SYM_CODE_START(xen_entry_SYSCALL_compat) +SYM_CODE_START(xen_entry_SYSENTER_compat) + UNWIND_HINT_ENTRY + ENDBR lea 16(%rsp), %rsp /* strip %rcx, %r11 */ mov $-ENOSYS, %rax pushq $0 jmp hypercall_iret -SYM_CODE_END(xen_sysenter_target) -SYM_CODE_END(xen_syscall32_target) +SYM_CODE_END(xen_entry_SYSENTER_compat) +SYM_CODE_END(xen_entry_SYSCALL_compat) #endif /* CONFIG_IA32_EMULATION */ diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 6a64496edefb..ffaa62167f6e 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -25,8 +25,13 @@ SYM_CODE_START(hypercall_page) .rept (PAGE_SIZE / 32) UNWIND_HINT_FUNC - .skip 31, 0x90 + ANNOTATE_NOENDBR + ANNOTATE_UNRET_SAFE ret + /* + * Xen will write the hypercall page, and sort out ENDBR. + */ + .skip 31, 0xcc .endr #define HYPERCALL(n) \ @@ -41,17 +46,9 @@ SYM_CODE_END(hypercall_page) __INIT SYM_CODE_START(startup_xen) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR cld - /* Clear .bss */ - xor %eax,%eax - mov $__bss_start, %rdi - mov $__bss_stop, %rcx - sub %rdi, %rcx - shr $3, %rcx - rep stosq - - mov %rsi, xen_start_info mov initial_stack(%rip), %rsp /* Set up %gs. @@ -66,6 +63,7 @@ SYM_CODE_START(startup_xen) cdq wrmsr + mov %rsi, %rdi call xen_start_kernel SYM_CODE_END(startup_xen) __FINIT @@ -74,6 +72,7 @@ SYM_CODE_END(startup_xen) .pushsection .text SYM_CODE_START(asm_cpu_bringup_and_idle) UNWIND_HINT_EMPTY + ENDBR call cpu_bringup_and_idle SYM_CODE_END(asm_cpu_bringup_and_idle) diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index fd0fec6e92f4..9a8bb972193d 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -10,10 +10,10 @@ /* These are code, but not functions. Defined in entry.S */ extern const char xen_failsafe_callback[]; -void xen_sysenter_target(void); +void xen_entry_SYSENTER_compat(void); #ifdef CONFIG_X86_64 -void xen_syscall_target(void); -void xen_syscall32_target(void); +void xen_entry_SYSCALL_64(void); +void xen_entry_SYSCALL_compat(void); #endif extern void *xen_initial_gdt; |