aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/x86/xen/enlighten_pv.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/xen/enlighten_pv.c')
-rw-r--r--arch/x86/xen/enlighten_pv.c165
1 files changed, 119 insertions, 46 deletions
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index ace2eb054053..26bbaf4b7330 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -49,6 +49,7 @@
#include <xen/hvc-console.h>
#include <xen/acpi.h>
+#include <asm/cpuid/api.h>
#include <asm/paravirt.h>
#include <asm/apic.h>
#include <asm/page.h>
@@ -60,6 +61,7 @@
#include <asm/processor.h>
#include <asm/proto.h>
#include <asm/msr-index.h>
+#include <asm/msr.h>
#include <asm/traps.h>
#include <asm/setup.h>
#include <asm/desc.h>
@@ -72,6 +74,7 @@
#include <asm/mwait.h>
#include <asm/pci_x86.h>
#include <asm/cpu.h>
+#include <asm/irq_stack.h>
#ifdef CONFIG_X86_IOPL_IOPERM
#include <asm/io_bitmap.h>
#endif
@@ -85,10 +88,6 @@
#endif
#include "xen-ops.h"
-#include "mmu.h"
-#include "smp.h"
-#include "multicalls.h"
-#include "pmu.h"
#include "../kernel/cpu/cpu.h" /* get_cpu_cap() */
@@ -97,12 +96,49 @@ void *xen_initial_gdt;
static int xen_cpu_up_prepare_pv(unsigned int cpu);
static int xen_cpu_dead_pv(unsigned int cpu);
+#ifndef CONFIG_PREEMPTION
+/*
+ * Some hypercalls issued by the toolstack can take many 10s of
+ * seconds. Allow tasks running hypercalls via the privcmd driver to
+ * be voluntarily preempted even if full kernel preemption is
+ * disabled.
+ *
+ * Such preemptible hypercalls are bracketed by
+ * xen_preemptible_hcall_begin() and xen_preemptible_hcall_end()
+ * calls.
+ */
+DEFINE_PER_CPU(bool, xen_in_preemptible_hcall);
+EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall);
+
+/*
+ * In case of scheduling the flag must be cleared and restored after
+ * returning from schedule as the task might move to a different CPU.
+ */
+static __always_inline bool get_and_clear_inhcall(void)
+{
+ bool inhcall = __this_cpu_read(xen_in_preemptible_hcall);
+
+ __this_cpu_write(xen_in_preemptible_hcall, false);
+ return inhcall;
+}
+
+static __always_inline void restore_inhcall(bool inhcall)
+{
+ __this_cpu_write(xen_in_preemptible_hcall, inhcall);
+}
+
+#else
+
+static __always_inline bool get_and_clear_inhcall(void) { return false; }
+static __always_inline void restore_inhcall(bool inhcall) { }
+
+#endif
+
struct tls_descs {
struct desc_struct desc[3];
};
DEFINE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode) = XEN_LAZY_NONE;
-DEFINE_PER_CPU(unsigned int, xen_lazy_nesting);
enum xen_lazy_mode xen_get_lazy_mode(void)
{
@@ -175,7 +211,7 @@ static void __init xen_set_mtrr_data(void)
/* Only overwrite MTRR state if any MTRR could be got from Xen. */
if (reg)
- mtrr_overwrite_state(var, reg, MTRR_TYPE_UNCACHABLE);
+ guest_force_mtrr_state(var, reg, MTRR_TYPE_UNCACHABLE);
#endif
}
@@ -199,7 +235,7 @@ static void __init xen_pv_init_platform(void)
if (xen_initial_domain())
xen_set_mtrr_data();
else
- mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
+ guest_force_mtrr_state(NULL, 0, MTRR_TYPE_WRBACK);
/* Adjust nr_cpu_ids before "enumeration" happens */
xen_smp_count_cpus();
@@ -219,14 +255,22 @@ static __read_mostly unsigned int cpuid_leaf5_edx_val;
static void xen_cpuid(unsigned int *ax, unsigned int *bx,
unsigned int *cx, unsigned int *dx)
{
- unsigned maskebx = ~0;
+ unsigned int maskebx = ~0;
+ unsigned int or_ebx = 0;
/*
* Mask out inconvenient features, to try and disable as many
* unsupported kernel subsystems as possible.
*/
switch (*ax) {
- case CPUID_MWAIT_LEAF:
+ case 0x1:
+ /* Replace initial APIC ID in bits 24-31 of EBX. */
+ /* See xen_pv_smp_config() for related topology preparations. */
+ maskebx = 0x00ffffff;
+ or_ebx = smp_processor_id() << 24;
+ break;
+
+ case CPUID_LEAF_MWAIT:
/* Synthesize the values.. */
*ax = 0;
*bx = 0;
@@ -248,6 +292,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
: "0" (*ax), "2" (*cx));
*bx &= maskebx;
+ *bx |= or_ebx;
}
static bool __init xen_check_mwait(void)
@@ -295,7 +340,7 @@ static bool __init xen_check_mwait(void)
* ecx and edx. The hypercall provides only partial information.
*/
- ax = CPUID_MWAIT_LEAF;
+ ax = CPUID_LEAF_MWAIT;
bx = 0;
cx = 0;
dx = 0;
@@ -681,6 +726,36 @@ DEFINE_IDTENTRY_RAW(xenpv_exc_machine_check)
}
#endif
+static void __xen_pv_evtchn_do_upcall(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ inc_irq_stat(irq_hv_callback_count);
+
+ xen_evtchn_do_upcall();
+
+ set_irq_regs(old_regs);
+}
+
+__visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs)
+{
+ irqentry_state_t state = irqentry_enter(regs);
+ bool inhcall;
+
+ instrumentation_begin();
+ run_sysvec_on_irqstack_cond(__xen_pv_evtchn_do_upcall, regs);
+
+ inhcall = get_and_clear_inhcall();
+ if (inhcall && !WARN_ON_ONCE(state.exit_rcu)) {
+ irqentry_exit_cond_resched();
+ instrumentation_end();
+ restore_inhcall(inhcall);
+ } else {
+ instrumentation_end();
+ irqentry_exit(regs, state);
+ }
+}
+
struct trap_array_entry {
void (*orig)(void);
void (*xen)(void);
@@ -1012,37 +1087,33 @@ static void xen_write_cr4(unsigned long cr4)
native_write_cr4(cr4);
}
-static u64 xen_do_read_msr(unsigned int msr, int *err)
+static u64 xen_do_read_msr(u32 msr, int *err)
{
u64 val = 0; /* Avoid uninitialized value for safe variant. */
- if (pmu_msr_read(msr, &val, err))
+ if (pmu_msr_chk_emulated(msr, &val, true))
return val;
if (err)
- val = native_read_msr_safe(msr, err);
+ *err = native_read_msr_safe(msr, &val);
else
val = native_read_msr(msr);
switch (msr) {
case MSR_IA32_APICBASE:
val &= ~X2APIC_ENABLE;
+ if (smp_processor_id() == 0)
+ val |= MSR_IA32_APICBASE_BSP;
+ else
+ val &= ~MSR_IA32_APICBASE_BSP;
break;
}
return val;
}
-static void set_seg(unsigned int which, unsigned int low, unsigned int high,
- int *err)
+static void set_seg(u32 which, u64 base)
{
- u64 base = ((u64)high << 32) | low;
-
- if (HYPERVISOR_set_segment_base(which, base) == 0)
- return;
-
- if (err)
- *err = -EIO;
- else
+ if (HYPERVISOR_set_segment_base(which, base))
WARN(1, "Xen set_segment_base(%u, %llx) failed\n", which, base);
}
@@ -1051,20 +1122,19 @@ static void set_seg(unsigned int which, unsigned int low, unsigned int high,
* With err == NULL write_msr() semantics are selected.
* Supplying an err pointer requires err to be pre-initialized with 0.
*/
-static void xen_do_write_msr(unsigned int msr, unsigned int low,
- unsigned int high, int *err)
+static void xen_do_write_msr(u32 msr, u64 val, int *err)
{
switch (msr) {
case MSR_FS_BASE:
- set_seg(SEGBASE_FS, low, high, err);
+ set_seg(SEGBASE_FS, val);
break;
case MSR_KERNEL_GS_BASE:
- set_seg(SEGBASE_GS_USER, low, high, err);
+ set_seg(SEGBASE_GS_USER, val);
break;
case MSR_GS_BASE:
- set_seg(SEGBASE_GS_KERNEL, low, high, err);
+ set_seg(SEGBASE_GS_KERNEL, val);
break;
case MSR_STAR:
@@ -1080,42 +1150,45 @@ static void xen_do_write_msr(unsigned int msr, unsigned int low,
break;
default:
- if (!pmu_msr_write(msr, low, high, err)) {
- if (err)
- *err = native_write_msr_safe(msr, low, high);
- else
- native_write_msr(msr, low, high);
- }
+ if (pmu_msr_chk_emulated(msr, &val, false))
+ return;
+
+ if (err)
+ *err = native_write_msr_safe(msr, val);
+ else
+ native_write_msr(msr, val);
}
}
-static u64 xen_read_msr_safe(unsigned int msr, int *err)
+static int xen_read_msr_safe(u32 msr, u64 *val)
{
- return xen_do_read_msr(msr, err);
+ int err = 0;
+
+ *val = xen_do_read_msr(msr, &err);
+ return err;
}
-static int xen_write_msr_safe(unsigned int msr, unsigned int low,
- unsigned int high)
+static int xen_write_msr_safe(u32 msr, u64 val)
{
int err = 0;
- xen_do_write_msr(msr, low, high, &err);
+ xen_do_write_msr(msr, val, &err);
return err;
}
-static u64 xen_read_msr(unsigned int msr)
+static u64 xen_read_msr(u32 msr)
{
- int err;
+ int err = 0;
return xen_do_read_msr(msr, xen_msr_safe ? &err : NULL);
}
-static void xen_write_msr(unsigned int msr, unsigned low, unsigned high)
+static void xen_write_msr(u32 msr, u64 val)
{
int err;
- xen_do_write_msr(msr, low, high, xen_msr_safe ? &err : NULL);
+ xen_do_write_msr(msr, val, xen_msr_safe ? &err : NULL);
}
/* This is called once we have the cpu_possible_mask */
@@ -1152,8 +1225,6 @@ static const typeof(pv_ops) xen_cpu_ops __initconst = {
.write_cr4 = xen_write_cr4,
- .wbinvd = pv_native_wbinvd,
-
.read_msr = xen_read_msr,
.write_msr = xen_write_msr,
@@ -1332,6 +1403,9 @@ asmlinkage __visible void __init xen_start_kernel(struct start_info *si)
xen_domain_type = XEN_PV_DOMAIN;
xen_start_flags = xen_start_info->flags;
+ /* Interrupts are guaranteed to be off initially. */
+ early_boot_irqs_disabled = true;
+ static_call_update_early(xen_hypercall, xen_hypercall_pv);
xen_setup_features();
@@ -1422,7 +1496,6 @@ asmlinkage __visible void __init xen_start_kernel(struct start_info *si)
WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv));
local_irq_disable();
- early_boot_irqs_disabled = true;
xen_raw_console_write("mapping kernel into physical memory\n");
xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base,