aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/xen')
-rw-r--r--arch/x86/xen/Kconfig10
-rw-r--r--arch/x86/xen/Makefile2
-rw-r--r--arch/x86/xen/apic.c2
-rw-r--r--arch/x86/xen/enlighten.c2
-rw-r--r--arch/x86/xen/enlighten_hvm.c50
-rw-r--r--arch/x86/xen/enlighten_pv.c135
-rw-r--r--arch/x86/xen/mmu_pv.c1
-rw-r--r--arch/x86/xen/pci-swiotlb-xen.c96
-rw-r--r--arch/x86/xen/pmu.c113
-rw-r--r--arch/x86/xen/pmu.h3
-rw-r--r--arch/x86/xen/setup.c27
-rw-r--r--arch/x86/xen/smp_hvm.c6
-rw-r--r--arch/x86/xen/smp_pv.c35
-rw-r--r--arch/x86/xen/suspend_hvm.c10
-rw-r--r--arch/x86/xen/time.c24
-rw-r--r--arch/x86/xen/vga.c12
-rw-r--r--arch/x86/xen/xen-asm.S52
-rw-r--r--arch/x86/xen/xen-head.S19
-rw-r--r--arch/x86/xen/xen-ops.h6
19 files changed, 310 insertions, 295 deletions
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 6bcd3d8ca6ac..9b1ec5d8c99c 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -23,6 +23,7 @@ config XEN_PV
select PARAVIRT_XXL
select XEN_HAVE_PVMMU
select XEN_HAVE_VPMU
+ select GUEST_PERF_EVENTS
help
Support running as a Xen PV guest.
@@ -91,3 +92,12 @@ config XEN_DOM0
select X86_X2APIC if XEN_PVH && X86_64
help
Support running as a Xen Dom0 guest.
+
+config XEN_PV_MSR_SAFE
+ bool "Always use safe MSR accesses in PV guests"
+ default y
+ depends on XEN_PV
+ help
+ Use safe (not faulting) MSR access functions even if the MSR access
+ should not fault anyway.
+ The default can be changed by using the "xen_msr_safe" boot parameter.
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 4953260e281c..3c5b52fbe4a7 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -47,6 +47,4 @@ obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
obj-$(CONFIG_XEN_PV_DOM0) += vga.o
-obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o
-
obj-$(CONFIG_XEN_EFI) += efi.o
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index 0d46cc283cf5..62d34b6611c5 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -51,7 +51,7 @@ static u32 xen_apic_read(u32 reg)
.interface_version = XENPF_INTERFACE_VERSION,
.u.pcpu_info.xen_cpuid = 0,
};
- int ret = 0;
+ int ret;
/* Shouldn't need this as APIC is turned off for PV, and we only
* get called on the bootup processor. But just in case. */
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 30c6e986a6cd..b8db2148c07d 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -51,7 +51,7 @@ EXPORT_SYMBOL_GPL(xen_start_info);
struct shared_info xen_dummy_shared_info;
-__read_mostly int xen_have_vector_callback;
+__read_mostly bool xen_have_vector_callback = true;
EXPORT_SYMBOL_GPL(xen_have_vector_callback);
/*
diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index 42300941ec29..c1cd28e915a3 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -4,11 +4,15 @@
#include <linux/cpu.h>
#include <linux/kexec.h>
#include <linux/memblock.h>
+#include <linux/virtio_anchor.h>
#include <xen/features.h>
#include <xen/events.h>
+#include <xen/hvm.h>
+#include <xen/interface/hvm/hvm_op.h>
#include <xen/interface/memory.h>
+#include <asm/apic.h>
#include <asm/cpu.h>
#include <asm/smp.h>
#include <asm/io_apic.h>
@@ -29,6 +33,9 @@
static unsigned long shared_info_pfn;
+__ro_after_init bool xen_percpu_upcall;
+EXPORT_SYMBOL_GPL(xen_percpu_upcall);
+
void xen_hvm_init_shared_info(void)
{
struct xen_add_to_physmap xatp;
@@ -124,6 +131,9 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_xen_hvm_callback)
{
struct pt_regs *old_regs = set_irq_regs(regs);
+ if (xen_percpu_upcall)
+ ack_APIC_irq();
+
inc_irq_stat(irq_hv_callback_count);
xen_hvm_evtchn_do_upcall();
@@ -167,6 +177,15 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu)
if (!xen_have_vector_callback)
return 0;
+ if (xen_percpu_upcall) {
+ rc = xen_set_upcall_vector(cpu);
+ if (rc) {
+ WARN(1, "HVMOP_set_evtchn_upcall_vector"
+ " for CPU %d failed: %d\n", cpu, rc);
+ return rc;
+ }
+ }
+
if (xen_feature(XENFEAT_hvm_safe_pvclock))
xen_setup_timer(cpu);
@@ -184,17 +203,17 @@ static int xen_cpu_dead_hvm(unsigned int cpu)
if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock))
xen_teardown_timer(cpu);
-
- return 0;
+ return 0;
}
-static bool no_vector_callback __initdata;
-
static void __init xen_hvm_guest_init(void)
{
if (xen_pv_domain())
return;
+ if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT))
+ virtio_set_mem_acc_cb(xen_virtio_restricted_mem_acc);
+
init_hvm_pv_info();
reserve_shared_info();
@@ -209,9 +228,6 @@ static void __init xen_hvm_guest_init(void)
xen_panic_handler_init();
- if (!no_vector_callback && xen_feature(XENFEAT_hvm_callback_vector))
- xen_have_vector_callback = 1;
-
xen_hvm_smp_init();
WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_hvm, xen_cpu_dead_hvm));
xen_unplug_emulated_devices();
@@ -237,20 +253,19 @@ early_param("xen_nopv", xen_parse_nopv);
static __init int xen_parse_no_vector_callback(char *arg)
{
- no_vector_callback = true;
+ xen_have_vector_callback = false;
return 0;
}
early_param("xen_no_vector_callback", xen_parse_no_vector_callback);
-bool __init xen_hvm_need_lapic(void)
+static __init bool xen_x2apic_available(void)
{
- if (xen_pv_domain())
- return false;
- if (!xen_hvm_domain())
- return false;
- if (xen_feature(XENFEAT_hvm_pirqs) && xen_have_vector_callback)
- return false;
- return true;
+ return x2apic_supported();
+}
+
+static bool __init msi_ext_dest_id(void)
+{
+ return cpuid_eax(xen_cpuid_base() + 4) & XEN_HVM_CPUID_EXT_DEST_ID;
}
static __init void xen_hvm_guest_late_init(void)
@@ -312,9 +327,10 @@ struct hypervisor_x86 x86_hyper_xen_hvm __initdata = {
.detect = xen_platform_hvm,
.type = X86_HYPER_XEN_HVM,
.init.init_platform = xen_hvm_guest_init,
- .init.x2apic_available = xen_x2apic_para_available,
+ .init.x2apic_available = xen_x2apic_available,
.init.init_mem_mapping = xen_hvm_init_mem_mapping,
.init.guest_late_init = xen_hvm_guest_late_init,
+ .init.msi_ext_dest_id = msi_ext_dest_id,
.runtime.pin_vcpu = xen_pin_vcpu,
.ignore_nopv = true,
};
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 5004feb16783..f82857e48815 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -30,7 +30,8 @@
#include <linux/pci.h>
#include <linux/gfp.h>
#include <linux/edd.h>
-#include <linux/objtool.h>
+#include <linux/reboot.h>
+#include <linux/virtio_anchor.h>
#include <xen/xen.h>
#include <xen/events.h>
@@ -107,8 +108,22 @@ struct tls_descs {
*/
static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc);
+static __read_mostly bool xen_msr_safe = IS_ENABLED(CONFIG_XEN_PV_MSR_SAFE);
+
+static int __init parse_xen_msr_safe(char *str)
+{
+ if (str)
+ return strtobool(str, &xen_msr_safe);
+ return -EINVAL;
+}
+early_param("xen_msr_safe", parse_xen_msr_safe);
+
static void __init xen_pv_init_platform(void)
{
+ /* PV guests can't operate virtio devices without grants. */
+ if (IS_ENABLED(CONFIG_XEN_VIRTIO))
+ virtio_set_mem_acc_cb(xen_virtio_restricted_mem_acc);
+
populate_extra_pte(fix_to_virt(FIX_PARAVIRT_BOOTMAP));
set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_start_info->shared_info);
@@ -165,7 +180,6 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
*bx &= maskebx;
}
-STACK_FRAME_NON_STANDARD(xen_cpuid); /* XEN_EMULATE_PREFIX */
static bool __init xen_check_mwait(void)
{
@@ -624,6 +638,9 @@ static struct trap_array_entry trap_array[] = {
TRAP_ENTRY(exc_coprocessor_error, false ),
TRAP_ENTRY(exc_alignment_check, false ),
TRAP_ENTRY(exc_simd_coprocessor_error, false ),
+#ifdef CONFIG_X86_KERNEL_IBT
+ TRAP_ENTRY(exc_control_protection, false ),
+#endif
};
static bool __ref get_trap_addr(void **addr, unsigned int ist)
@@ -758,6 +775,7 @@ static void xen_load_idt(const struct desc_ptr *desc)
{
static DEFINE_SPINLOCK(lock);
static struct trap_info traps[257];
+ static const struct trap_info zero = { };
unsigned out;
trace_xen_cpu_load_idt(desc);
@@ -767,7 +785,7 @@ static void xen_load_idt(const struct desc_ptr *desc)
memcpy(this_cpu_ptr(&idt_desc), desc, sizeof(idt_desc));
out = xen_convert_trap_info(desc, traps, false);
- memset(&traps[out], 0, sizeof(traps[0]));
+ traps[out] = zero;
xen_mc_flush();
if (HYPERVISOR_set_trap_table(traps))
@@ -909,14 +927,18 @@ static void xen_write_cr4(unsigned long cr4)
native_write_cr4(cr4);
}
-static u64 xen_read_msr_safe(unsigned int msr, int *err)
+static u64 xen_do_read_msr(unsigned int msr, int *err)
{
- u64 val;
+ u64 val = 0; /* Avoid uninitialized value for safe variant. */
if (pmu_msr_read(msr, &val, err))
return val;
- val = native_read_msr_safe(msr, err);
+ if (err)
+ val = native_read_msr_safe(msr, err);
+ else
+ val = native_read_msr(msr);
+
switch (msr) {
case MSR_IA32_APICBASE:
val &= ~X2APIC_ENABLE;
@@ -925,23 +947,39 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err)
return val;
}
-static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
+static void set_seg(unsigned int which, unsigned int low, unsigned int high,
+ int *err)
{
- int ret;
- unsigned int which;
- u64 base;
+ u64 base = ((u64)high << 32) | low;
- ret = 0;
+ if (HYPERVISOR_set_segment_base(which, base) == 0)
+ return;
+ if (err)
+ *err = -EIO;
+ else
+ WARN(1, "Xen set_segment_base(%u, %llx) failed\n", which, base);
+}
+
+/*
+ * Support write_msr_safe() and write_msr() semantics.
+ * With err == NULL write_msr() semantics are selected.
+ * Supplying an err pointer requires err to be pre-initialized with 0.
+ */
+static void xen_do_write_msr(unsigned int msr, unsigned int low,
+ unsigned int high, int *err)
+{
switch (msr) {
- case MSR_FS_BASE: which = SEGBASE_FS; goto set;
- case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set;
- case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set;
-
- set:
- base = ((u64)high << 32) | low;
- if (HYPERVISOR_set_segment_base(which, base) != 0)
- ret = -EIO;
+ case MSR_FS_BASE:
+ set_seg(SEGBASE_FS, low, high, err);
+ break;
+
+ case MSR_KERNEL_GS_BASE:
+ set_seg(SEGBASE_GS_USER, low, high, err);
+ break;
+
+ case MSR_GS_BASE:
+ set_seg(SEGBASE_GS_KERNEL, low, high, err);
break;
case MSR_STAR:
@@ -957,31 +995,42 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
break;
default:
- if (!pmu_msr_write(msr, low, high, &ret))
- ret = native_write_msr_safe(msr, low, high);
+ if (!pmu_msr_write(msr, low, high, err)) {
+ if (err)
+ *err = native_write_msr_safe(msr, low, high);
+ else
+ native_write_msr(msr, low, high);
+ }
}
+}
+
+static u64 xen_read_msr_safe(unsigned int msr, int *err)
+{
+ return xen_do_read_msr(msr, err);
+}
+
+static int xen_write_msr_safe(unsigned int msr, unsigned int low,
+ unsigned int high)
+{
+ int err = 0;
- return ret;
+ xen_do_write_msr(msr, low, high, &err);
+
+ return err;
}
static u64 xen_read_msr(unsigned int msr)
{
- /*
- * This will silently swallow a #GP from RDMSR. It may be worth
- * changing that.
- */
int err;
- return xen_read_msr_safe(msr, &err);
+ return xen_do_read_msr(msr, xen_msr_safe ? &err : NULL);
}
static void xen_write_msr(unsigned int msr, unsigned low, unsigned high)
{
- /*
- * This will silently swallow a #GP from WRMSR. It may be worth
- * changing that.
- */
- xen_write_msr_safe(msr, low, high);
+ int err;
+
+ xen_do_write_msr(msr, low, high, xen_msr_safe ? &err : NULL);
}
/* This is called once we have the cpu_possible_mask */
@@ -1068,8 +1117,7 @@ static void xen_machine_halt(void)
static void xen_machine_power_off(void)
{
- if (pm_power_off)
- pm_power_off();
+ do_kernel_power_off();
xen_reboot(SHUTDOWN_poweroff);
}
@@ -1177,16 +1225,26 @@ static void __init xen_domu_set_legacy_features(void)
x86_platform.legacy.rtc = 0;
}
+extern void early_xen_iret_patch(void);
+
/* First C function to be called on Xen boot */
-asmlinkage __visible void __init xen_start_kernel(void)
+asmlinkage __visible void __init xen_start_kernel(struct start_info *si)
{
struct physdev_set_iopl set_iopl;
unsigned long initrd_start = 0;
int rc;
- if (!xen_start_info)
+ if (!si)
return;
+ clear_bss();
+
+ xen_start_info = si;
+
+ __text_gen_insn(&early_xen_iret_patch,
+ JMP32_INSN_OPCODE, &early_xen_iret_patch, &xen_iret,
+ JMP32_INSN_SIZE);
+
xen_domain_type = XEN_PV_DOMAIN;
xen_start_flags = xen_start_info->flags;
@@ -1195,7 +1253,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
/* Install Xen paravirt ops */
pv_info = xen_info;
pv_ops.cpu = xen_cpu_ops.cpu;
- paravirt_iret = xen_iret;
xen_init_irq_ops();
/*
@@ -1341,10 +1398,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_acpi_sleep_register();
- /* Avoid searching for BIOS MP tables */
- x86_init.mpparse.find_smp_config = x86_init_noop;
- x86_init.mpparse.get_smp_config = x86_init_uint_noop;
-
xen_boot_params_init_edd();
#ifdef CONFIG_ACPI
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 00354866921b..ee29fb558f2e 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -80,6 +80,7 @@
#include <xen/interface/version.h>
#include <xen/interface/memory.h>
#include <xen/hvc-console.h>
+#include <xen/swiotlb-xen.h>
#include "multicalls.h"
#include "mmu.h"
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
deleted file mode 100644
index 46df59aeaa06..000000000000
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ /dev/null
@@ -1,96 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/* Glue code to lib/swiotlb-xen.c */
-
-#include <linux/dma-map-ops.h>
-#include <linux/pci.h>
-#include <xen/swiotlb-xen.h>
-
-#include <asm/xen/hypervisor.h>
-#include <xen/xen.h>
-#include <asm/iommu_table.h>
-
-
-#include <asm/xen/swiotlb-xen.h>
-#ifdef CONFIG_X86_64
-#include <asm/iommu.h>
-#include <asm/dma.h>
-#endif
-#include <linux/export.h>
-
-static int xen_swiotlb __read_mostly;
-
-/*
- * pci_xen_swiotlb_detect - set xen_swiotlb to 1 if necessary
- *
- * This returns non-zero if we are forced to use xen_swiotlb (by the boot
- * option).
- */
-int __init pci_xen_swiotlb_detect(void)
-{
-
- if (!xen_pv_domain())
- return 0;
-
- /* If running as PV guest, either iommu=soft, or swiotlb=force will
- * activate this IOMMU. If running as PV privileged, activate it
- * irregardless.
- */
- if (xen_initial_domain() || swiotlb || swiotlb_force == SWIOTLB_FORCE)
- xen_swiotlb = 1;
-
- /* If we are running under Xen, we MUST disable the native SWIOTLB.
- * Don't worry about swiotlb_force flag activating the native, as
- * the 'swiotlb' flag is the only one turning it on. */
- swiotlb = 0;
-
-#ifdef CONFIG_X86_64
- /* pci_swiotlb_detect_4gb turns on native SWIOTLB if no_iommu == 0
- * (so no iommu=X command line over-writes).
- * Considering that PV guests do not want the *native SWIOTLB* but
- * only Xen SWIOTLB it is not useful to us so set no_iommu=1 here.
- */
- if (max_pfn > MAX_DMA32_PFN)
- no_iommu = 1;
-#endif
- return xen_swiotlb;
-}
-
-static void __init pci_xen_swiotlb_init(void)
-{
- if (xen_swiotlb) {
- xen_swiotlb_init_early();
- dma_ops = &xen_swiotlb_dma_ops;
-
-#ifdef CONFIG_PCI
- /* Make sure ACS will be enabled */
- pci_request_acs();
-#endif
- }
-}
-
-int pci_xen_swiotlb_init_late(void)
-{
- int rc;
-
- if (xen_swiotlb)
- return 0;
-
- rc = xen_swiotlb_init();
- if (rc)
- return rc;
-
- dma_ops = &xen_swiotlb_dma_ops;
-#ifdef CONFIG_PCI
- /* Make sure ACS will be enabled */
- pci_request_acs();
-#endif
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late);
-
-IOMMU_INIT_FINISH(pci_xen_swiotlb_detect,
- NULL,
- pci_xen_swiotlb_init,
- NULL);
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
index e13b0b49fcdf..246d67dab510 100644
--- a/arch/x86/xen/pmu.c
+++ b/arch/x86/xen/pmu.c
@@ -131,6 +131,10 @@ static inline uint32_t get_fam15h_addr(u32 addr)
static inline bool is_amd_pmu_msr(unsigned int msr)
{
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
+ return false;
+
if ((msr >= MSR_F15H_PERF_CTL &&
msr < MSR_F15H_PERF_CTR + (amd_num_counters * 2)) ||
(msr >= MSR_K7_EVNTSEL0 &&
@@ -140,10 +144,15 @@ static inline bool is_amd_pmu_msr(unsigned int msr)
return false;
}
-static int is_intel_pmu_msr(u32 msr_index, int *type, int *index)
+static bool is_intel_pmu_msr(u32 msr_index, int *type, int *index)
{
u32 msr_index_pmc;
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)
+ return false;
+
switch (msr_index) {
case MSR_CORE_PERF_FIXED_CTR_CTRL:
case MSR_IA32_DS_AREA:
@@ -290,48 +299,52 @@ static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read)
return false;
}
+static bool pmu_msr_chk_emulated(unsigned int msr, uint64_t *val, bool is_read,
+ bool *emul)
+{
+ int type, index = 0;
+
+ if (is_amd_pmu_msr(msr))
+ *emul = xen_amd_pmu_emulate(msr, val, is_read);
+ else if (is_intel_pmu_msr(msr, &type, &index))
+ *emul = xen_intel_pmu_emulate(msr, val, type, index, is_read);
+ else
+ return false;
+
+ return true;
+}
+
bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err)
{
- if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
- if (is_amd_pmu_msr(msr)) {
- if (!xen_amd_pmu_emulate(msr, val, 1))
- *val = native_read_msr_safe(msr, err);
- return true;
- }
- } else {
- int type, index;
+ bool emulated;
- if (is_intel_pmu_msr(msr, &type, &index)) {
- if (!xen_intel_pmu_emulate(msr, val, type, index, 1))
- *val = native_read_msr_safe(msr, err);
- return true;
- }
+ if (!pmu_msr_chk_emulated(msr, val, true, &emulated))
+ return false;
+
+ if (!emulated) {
+ *val = err ? native_read_msr_safe(msr, err)
+ : native_read_msr(msr);
}
- return false;
+ return true;
}
bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err)
{
uint64_t val = ((uint64_t)high << 32) | low;
+ bool emulated;
- if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
- if (is_amd_pmu_msr(msr)) {
- if (!xen_amd_pmu_emulate(msr, &val, 0))
- *err = native_write_msr_safe(msr, low, high);
- return true;
- }
- } else {
- int type, index;
+ if (!pmu_msr_chk_emulated(msr, &val, false, &emulated))
+ return false;
- if (is_intel_pmu_msr(msr, &type, &index)) {
- if (!xen_intel_pmu_emulate(msr, &val, type, index, 0))
- *err = native_write_msr_safe(msr, low, high);
- return true;
- }
+ if (!emulated) {
+ if (err)
+ *err = native_write_msr_safe(msr, low, high);
+ else
+ native_write_msr(msr, low, high);
}
- return false;
+ return true;
}
static unsigned long long xen_amd_read_pmc(int counter)
@@ -413,34 +426,29 @@ int pmu_apic_update(uint32_t val)
}
/* perf callbacks */
-static int xen_is_in_guest(void)
+static unsigned int xen_guest_state(void)
{
const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+ unsigned int state = 0;
if (!xenpmu_data) {
pr_warn_once("%s: pmudata not initialized\n", __func__);
- return 0;
+ return state;
}
if (!xen_initial_domain() || (xenpmu_data->domain_id >= DOMID_SELF))
- return 0;
-
- return 1;
-}
+ return state;
-static int xen_is_user_mode(void)
-{
- const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+ state |= PERF_GUEST_ACTIVE;
- if (!xenpmu_data) {
- pr_warn_once("%s: pmudata not initialized\n", __func__);
- return 0;
+ if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV) {
+ if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER)
+ state |= PERF_GUEST_USER;
+ } else if (xenpmu_data->pmu.r.regs.cpl & 3) {
+ state |= PERF_GUEST_USER;
}
- if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV)
- return (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER);
- else
- return !!(xenpmu_data->pmu.r.regs.cpl & 3);
+ return state;
}
static unsigned long xen_get_guest_ip(void)
@@ -456,9 +464,8 @@ static unsigned long xen_get_guest_ip(void)
}
static struct perf_guest_info_callbacks xen_guest_cbs = {
- .is_in_guest = xen_is_in_guest,
- .is_user_mode = xen_is_user_mode,
- .get_guest_ip = xen_get_guest_ip,
+ .state = xen_guest_state,
+ .get_ip = xen_get_guest_ip,
};
/* Convert registers from Xen's format to Linux' */
@@ -512,10 +519,7 @@ irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
return ret;
}
-bool is_xen_pmu(int cpu)
-{
- return (get_xenpmu_data() != NULL);
-}
+bool is_xen_pmu;
void xen_pmu_init(int cpu)
{
@@ -526,7 +530,7 @@ void xen_pmu_init(int cpu)
BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE);
- if (xen_hvm_domain())
+ if (xen_hvm_domain() || (cpu != 0 && !is_xen_pmu))
return;
xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL);
@@ -547,7 +551,8 @@ void xen_pmu_init(int cpu)
per_cpu(xenpmu_shared, cpu).xenpmu_data = xenpmu_data;
per_cpu(xenpmu_shared, cpu).flags = 0;
- if (cpu == 0) {
+ if (!is_xen_pmu) {
+ is_xen_pmu = true;
perf_register_guest_info_callbacks(&xen_guest_cbs);
xen_pmu_arch_init();
}
diff --git a/arch/x86/xen/pmu.h b/arch/x86/xen/pmu.h
index 0e83a160589b..65c58894fc79 100644
--- a/arch/x86/xen/pmu.h
+++ b/arch/x86/xen/pmu.h
@@ -4,6 +4,8 @@
#include <xen/interface/xenpmu.h>
+extern bool is_xen_pmu;
+
irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id);
#ifdef CONFIG_XEN_HAVE_VPMU
void xen_pmu_init(int cpu);
@@ -12,7 +14,6 @@ void xen_pmu_finish(int cpu);
static inline void xen_pmu_init(int cpu) {}
static inline void xen_pmu_finish(int cpu) {}
#endif
-bool is_xen_pmu(int cpu);
bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err);
bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err);
int pmu_apic_update(uint32_t reg);
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index af216feb63d9..4f4309500559 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -719,7 +719,7 @@ static void __init xen_reserve_xen_mfnlist(void)
}
/**
- * machine_specific_memory_setup - Hook for machine specific memory setup.
+ * xen_memory_setup - Hook for machine specific memory setup.
**/
char * __init xen_memory_setup(void)
{
@@ -910,36 +910,25 @@ static int register_callback(unsigned type, const void *func)
void xen_enable_sysenter(void)
{
- int ret;
- unsigned sysenter_feature;
-
- sysenter_feature = X86_FEATURE_SYSENTER32;
-
- if (!boot_cpu_has(sysenter_feature))
- return;
-
- ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target);
- if(ret != 0)
- setup_clear_cpu_cap(sysenter_feature);
+ if (cpu_feature_enabled(X86_FEATURE_SYSENTER32) &&
+ register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat))
+ setup_clear_cpu_cap(X86_FEATURE_SYSENTER32);
}
void xen_enable_syscall(void)
{
int ret;
- ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target);
+ ret = register_callback(CALLBACKTYPE_syscall, xen_entry_SYSCALL_64);
if (ret != 0) {
printk(KERN_ERR "Failed to set syscall callback: %d\n", ret);
/* Pretty fatal; 64-bit userspace has no other
mechanism for syscalls. */
}
- if (boot_cpu_has(X86_FEATURE_SYSCALL32)) {
- ret = register_callback(CALLBACKTYPE_syscall32,
- xen_syscall32_target);
- if (ret != 0)
- setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
- }
+ if (cpu_feature_enabled(X86_FEATURE_SYSCALL32) &&
+ register_callback(CALLBACKTYPE_syscall32, xen_entry_SYSCALL_compat))
+ setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
}
static void __init xen_pvmmu_arch_setup(void)
diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c
index 6ff3c887e0b9..b70afdff419c 100644
--- a/arch/x86/xen/smp_hvm.c
+++ b/arch/x86/xen/smp_hvm.c
@@ -20,6 +20,12 @@ static void __init xen_hvm_smp_prepare_boot_cpu(void)
xen_vcpu_setup(0);
/*
+ * Called again in case the kernel boots on vcpu >= MAX_VIRT_CPUS.
+ * Refer to comments in xen_hvm_init_time_ops().
+ */
+ xen_hvm_init_time_ops();
+
+ /*
* The alternative logic (which patches the unlock/lock) runs before
* the smp bootup up code is activated. Hence we need to set this up
* the core kernel is being patched. Otherwise we will have only
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 6a8f3b53ab83..480be82e9b7b 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -129,7 +129,7 @@ int xen_smp_intr_init_pv(unsigned int cpu)
per_cpu(xen_irq_work, cpu).irq = rc;
per_cpu(xen_irq_work, cpu).name = callfunc_name;
- if (is_xen_pmu(cpu)) {
+ if (is_xen_pmu) {
pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu);
rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu,
xen_pmu_irq_handler,
@@ -148,28 +148,12 @@ int xen_smp_intr_init_pv(unsigned int cpu)
return rc;
}
-static void __init xen_fill_possible_map(void)
-{
- int i, rc;
-
- if (xen_initial_domain())
- return;
-
- for (i = 0; i < nr_cpu_ids; i++) {
- rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
- if (rc >= 0) {
- num_processors++;
- set_cpu_possible(i, true);
- }
- }
-}
-
-static void __init xen_filter_cpu_maps(void)
+static void __init _get_smp_config(unsigned int early)
{
int i, rc;
unsigned int subtract = 0;
- if (!xen_initial_domain())
+ if (early)
return;
num_processors = 0;
@@ -195,7 +179,7 @@ static void __init xen_filter_cpu_maps(void)
* hypercall to expand the max number of VCPUs an already
* running guest has. So cap it up to X. */
if (subtract)
- nr_cpu_ids = nr_cpu_ids - subtract;
+ set_nr_cpu_ids(nr_cpu_ids - subtract);
#endif
}
@@ -210,7 +194,6 @@ static void __init xen_pv_smp_prepare_boot_cpu(void)
* sure the old memory can be recycled. */
make_lowmem_page_readwrite(xen_initial_gdt);
- xen_filter_cpu_maps();
xen_setup_vcpu_info_placement();
/*
@@ -277,8 +260,11 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
return 0;
ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
- if (ctxt == NULL)
+ if (ctxt == NULL) {
+ cpumask_clear_cpu(cpu, xen_cpu_initialized_map);
+ cpumask_clear_cpu(cpu, cpu_callout_mask);
return -ENOMEM;
+ }
gdt = get_cpu_gdt_rw(cpu);
@@ -476,5 +462,8 @@ static const struct smp_ops xen_smp_ops __initconst = {
void __init xen_smp_init(void)
{
smp_ops = xen_smp_ops;
- xen_fill_possible_map();
+
+ /* Avoid searching for BIOS MP tables */
+ x86_init.mpparse.find_smp_config = x86_init_noop;
+ x86_init.mpparse.get_smp_config = _get_smp_config;
}
diff --git a/arch/x86/xen/suspend_hvm.c b/arch/x86/xen/suspend_hvm.c
index 9d548b0c772f..0c4f7554b7cc 100644
--- a/arch/x86/xen/suspend_hvm.c
+++ b/arch/x86/xen/suspend_hvm.c
@@ -5,6 +5,7 @@
#include <xen/hvm.h>
#include <xen/features.h>
#include <xen/interface/features.h>
+#include <xen/events.h>
#include "xen-ops.h"
@@ -14,6 +15,13 @@ void xen_hvm_post_suspend(int suspend_cancelled)
xen_hvm_init_shared_info();
xen_vcpu_restore();
}
- xen_setup_callback_vector();
+ if (xen_percpu_upcall) {
+ unsigned int cpu;
+
+ for_each_online_cpu(cpu)
+ BUG_ON(xen_set_upcall_vector(cpu));
+ } else {
+ xen_setup_callback_vector();
+ }
xen_unplug_emulated_devices();
}
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index d9c945ee1100..9ef0a5cca96e 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -558,6 +558,11 @@ static void xen_hvm_setup_cpu_clockevents(void)
void __init xen_hvm_init_time_ops(void)
{
+ static bool hvm_time_initialized;
+
+ if (hvm_time_initialized)
+ return;
+
/*
* vector callback is needed otherwise we cannot receive interrupts
* on cpu > 0 and at this point we don't know how many cpus are
@@ -567,7 +572,22 @@ void __init xen_hvm_init_time_ops(void)
return;
if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
- pr_info("Xen doesn't support pvclock on HVM, disable pv timer");
+ pr_info_once("Xen doesn't support pvclock on HVM, disable pv timer");
+ return;
+ }
+
+ /*
+ * Only MAX_VIRT_CPUS 'vcpu_info' are embedded inside 'shared_info'.
+ * The __this_cpu_read(xen_vcpu) is still NULL when Xen HVM guest
+ * boots on vcpu >= MAX_VIRT_CPUS (e.g., kexec), To access
+ * __this_cpu_read(xen_vcpu) via xen_clocksource_read() will panic.
+ *
+ * The xen_hvm_init_time_ops() should be called again later after
+ * __this_cpu_read(xen_vcpu) is available.
+ */
+ if (!__this_cpu_read(xen_vcpu)) {
+ pr_info("Delay xen_init_time_common() as kernel is running on vcpu=%d\n",
+ xen_vcpu_nr(0));
return;
}
@@ -577,6 +597,8 @@ void __init xen_hvm_init_time_ops(void)
x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
x86_platform.set_wallclock = xen_set_wallclock;
+
+ hvm_time_initialized = true;
}
#endif
diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c
index e336f223f7f4..14ea32e734d5 100644
--- a/arch/x86/xen/vga.c
+++ b/arch/x86/xen/vga.c
@@ -57,16 +57,20 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size)
screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size;
screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos;
+ if (size >= offsetof(struct dom0_vga_console_info,
+ u.vesa_lfb.ext_lfb_base)
+ + sizeof(info->u.vesa_lfb.ext_lfb_base)
+ && info->u.vesa_lfb.ext_lfb_base) {
+ screen_info->ext_lfb_base = info->u.vesa_lfb.ext_lfb_base;
+ screen_info->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
+ }
+
if (info->video_type == XEN_VGATYPE_EFI_LFB) {
screen_info->orig_video_isVGA = VIDEO_TYPE_EFI;
break;
}
if (size >= offsetof(struct dom0_vga_console_info,
- u.vesa_lfb.gbl_caps)
- + sizeof(info->u.vesa_lfb.gbl_caps))
- screen_info->capabilities = info->u.vesa_lfb.gbl_caps;
- if (size >= offsetof(struct dom0_vga_console_info,
u.vesa_lfb.mode_attrs)
+ sizeof(info->u.vesa_lfb.mode_attrs))
screen_info->vesa_attributes = info->u.vesa_lfb.mode_attrs;
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 444d824775f6..6b4fdf6b9542 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -29,7 +29,7 @@
*/
SYM_FUNC_START(xen_irq_disable_direct)
movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
- ret
+ RET
SYM_FUNC_END(xen_irq_disable_direct)
/*
@@ -58,7 +58,7 @@ SYM_FUNC_START(check_events)
pop %rcx
pop %rax
FRAME_END
- ret
+ RET
SYM_FUNC_END(check_events)
/*
@@ -84,7 +84,7 @@ SYM_FUNC_START(xen_irq_enable_direct)
call check_events
1:
FRAME_END
- ret
+ RET
SYM_FUNC_END(xen_irq_enable_direct)
/*
@@ -100,7 +100,7 @@ SYM_FUNC_START(xen_save_fl_direct)
testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
setz %ah
addb %ah, %ah
- ret
+ RET
SYM_FUNC_END(xen_save_fl_direct)
SYM_FUNC_START(xen_read_cr2)
@@ -108,20 +108,21 @@ SYM_FUNC_START(xen_read_cr2)
_ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX
_ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX
FRAME_END
- ret
+ RET
SYM_FUNC_END(xen_read_cr2);
SYM_FUNC_START(xen_read_cr2_direct)
FRAME_BEGIN
_ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX
FRAME_END
- ret
+ RET
SYM_FUNC_END(xen_read_cr2_direct);
.popsection
.macro xen_pv_trap name
SYM_CODE_START(xen_\name)
- UNWIND_HINT_EMPTY
+ UNWIND_HINT_ENTRY
+ ENDBR
pop %rcx
pop %r11
jmp \name
@@ -147,6 +148,9 @@ xen_pv_trap asm_exc_page_fault
xen_pv_trap asm_exc_spurious_interrupt_bug
xen_pv_trap asm_exc_coprocessor_error
xen_pv_trap asm_exc_alignment_check
+#ifdef CONFIG_X86_KERNEL_IBT
+xen_pv_trap asm_exc_control_protection
+#endif
#ifdef CONFIG_X86_MCE
xen_pv_trap asm_xenpv_exc_machine_check
#endif /* CONFIG_X86_MCE */
@@ -162,6 +166,7 @@ SYM_CODE_START(xen_early_idt_handler_array)
i = 0
.rept NUM_EXCEPTION_VECTORS
UNWIND_HINT_EMPTY
+ ENDBR
pop %rcx
pop %r11
jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE
@@ -189,6 +194,7 @@ hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
*/
SYM_CODE_START(xen_iret)
UNWIND_HINT_EMPTY
+ ANNOTATE_NOENDBR
pushq $0
jmp hypercall_iret
SYM_CODE_END(xen_iret)
@@ -228,8 +234,9 @@ SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode)
*/
/* Normal 64-bit system call target */
-SYM_CODE_START(xen_syscall_target)
- UNWIND_HINT_EMPTY
+SYM_CODE_START(xen_entry_SYSCALL_64)
+ UNWIND_HINT_ENTRY
+ ENDBR
popq %rcx
popq %r11
@@ -242,13 +249,14 @@ SYM_CODE_START(xen_syscall_target)
movq $__USER_CS, 1*8(%rsp)
jmp entry_SYSCALL_64_after_hwframe
-SYM_CODE_END(xen_syscall_target)
+SYM_CODE_END(xen_entry_SYSCALL_64)
#ifdef CONFIG_IA32_EMULATION
/* 32-bit compat syscall target */
-SYM_CODE_START(xen_syscall32_target)
- UNWIND_HINT_EMPTY
+SYM_CODE_START(xen_entry_SYSCALL_compat)
+ UNWIND_HINT_ENTRY
+ ENDBR
popq %rcx
popq %r11
@@ -261,11 +269,12 @@ SYM_CODE_START(xen_syscall32_target)
movq $__USER32_CS, 1*8(%rsp)
jmp entry_SYSCALL_compat_after_hwframe
-SYM_CODE_END(xen_syscall32_target)
+SYM_CODE_END(xen_entry_SYSCALL_compat)
/* 32-bit compat sysenter target */
-SYM_CODE_START(xen_sysenter_target)
- UNWIND_HINT_EMPTY
+SYM_CODE_START(xen_entry_SYSENTER_compat)
+ UNWIND_HINT_ENTRY
+ ENDBR
/*
* NB: Xen is polite and clears TF from EFLAGS for us. This means
* that we don't need to guard against single step exceptions here.
@@ -282,18 +291,19 @@ SYM_CODE_START(xen_sysenter_target)
movq $__USER32_CS, 1*8(%rsp)
jmp entry_SYSENTER_compat_after_hwframe
-SYM_CODE_END(xen_sysenter_target)
+SYM_CODE_END(xen_entry_SYSENTER_compat)
#else /* !CONFIG_IA32_EMULATION */
-SYM_CODE_START(xen_syscall32_target)
-SYM_CODE_START(xen_sysenter_target)
- UNWIND_HINT_EMPTY
+SYM_CODE_START(xen_entry_SYSCALL_compat)
+SYM_CODE_START(xen_entry_SYSENTER_compat)
+ UNWIND_HINT_ENTRY
+ ENDBR
lea 16(%rsp), %rsp /* strip %rcx, %r11 */
mov $-ENOSYS, %rax
pushq $0
jmp hypercall_iret
-SYM_CODE_END(xen_sysenter_target)
-SYM_CODE_END(xen_syscall32_target)
+SYM_CODE_END(xen_entry_SYSENTER_compat)
+SYM_CODE_END(xen_entry_SYSCALL_compat)
#endif /* CONFIG_IA32_EMULATION */
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 6a64496edefb..ffaa62167f6e 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -25,8 +25,13 @@
SYM_CODE_START(hypercall_page)
.rept (PAGE_SIZE / 32)
UNWIND_HINT_FUNC
- .skip 31, 0x90
+ ANNOTATE_NOENDBR
+ ANNOTATE_UNRET_SAFE
ret
+ /*
+ * Xen will write the hypercall page, and sort out ENDBR.
+ */
+ .skip 31, 0xcc
.endr
#define HYPERCALL(n) \
@@ -41,17 +46,9 @@ SYM_CODE_END(hypercall_page)
__INIT
SYM_CODE_START(startup_xen)
UNWIND_HINT_EMPTY
+ ANNOTATE_NOENDBR
cld
- /* Clear .bss */
- xor %eax,%eax
- mov $__bss_start, %rdi
- mov $__bss_stop, %rcx
- sub %rdi, %rcx
- shr $3, %rcx
- rep stosq
-
- mov %rsi, xen_start_info
mov initial_stack(%rip), %rsp
/* Set up %gs.
@@ -66,6 +63,7 @@ SYM_CODE_START(startup_xen)
cdq
wrmsr
+ mov %rsi, %rdi
call xen_start_kernel
SYM_CODE_END(startup_xen)
__FINIT
@@ -74,6 +72,7 @@ SYM_CODE_END(startup_xen)
.pushsection .text
SYM_CODE_START(asm_cpu_bringup_and_idle)
UNWIND_HINT_EMPTY
+ ENDBR
call cpu_bringup_and_idle
SYM_CODE_END(asm_cpu_bringup_and_idle)
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index fd0fec6e92f4..9a8bb972193d 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -10,10 +10,10 @@
/* These are code, but not functions. Defined in entry.S */
extern const char xen_failsafe_callback[];
-void xen_sysenter_target(void);
+void xen_entry_SYSENTER_compat(void);
#ifdef CONFIG_X86_64
-void xen_syscall_target(void);
-void xen_syscall32_target(void);
+void xen_entry_SYSCALL_64(void);
+void xen_entry_SYSCALL_compat(void);
#endif
extern void *xen_initial_gdt;