aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen/enlighten_pv.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/xen/enlighten_pv.c')
-rw-r--r--arch/x86/xen/enlighten_pv.c531
1 files changed, 267 insertions, 264 deletions
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 507f4fb88fa7..f82857e48815 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -27,12 +27,11 @@
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/page-flags.h>
-#include <linux/highmem.h>
-#include <linux/console.h>
#include <linux/pci.h>
#include <linux/gfp.h>
#include <linux/edd.h>
-#include <linux/frame.h>
+#include <linux/reboot.h>
+#include <linux/virtio_anchor.h>
#include <xen/xen.h>
#include <xen/events.h>
@@ -63,7 +62,6 @@
#include <asm/setup.h>
#include <asm/desc.h>
#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/reboot.h>
#include <asm/stackprotector.h>
@@ -110,28 +108,22 @@ struct tls_descs {
*/
static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc);
-static void __init xen_banner(void)
+static __read_mostly bool xen_msr_safe = IS_ENABLED(CONFIG_XEN_PV_MSR_SAFE);
+
+static int __init parse_xen_msr_safe(char *str)
{
- unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL);
- struct xen_extraversion extra;
- HYPERVISOR_xen_version(XENVER_extraversion, &extra);
-
- pr_info("Booting paravirtualized kernel on %s\n", pv_info.name);
- printk(KERN_INFO "Xen version: %d.%d%s%s\n",
- version >> 16, version & 0xffff, extra.extraversion,
- xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
-
-#ifdef CONFIG_X86_32
- pr_warn("WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n"
- "Support for running as 32-bit PV-guest under Xen will soon be removed\n"
- "from the Linux kernel!\n"
- "Please use either a 64-bit kernel or switch to HVM or PVH mode!\n"
- "WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n");
-#endif
+ if (str)
+ return strtobool(str, &xen_msr_safe);
+ return -EINVAL;
}
+early_param("xen_msr_safe", parse_xen_msr_safe);
static void __init xen_pv_init_platform(void)
{
+ /* PV guests can't operate virtio devices without grants. */
+ if (IS_ENABLED(CONFIG_XEN_VIRTIO))
+ virtio_set_mem_acc_cb(xen_virtio_restricted_mem_acc);
+
populate_extra_pte(fix_to_virt(FIX_PARAVIRT_BOOTMAP));
set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_start_info->shared_info);
@@ -152,22 +144,6 @@ static void __init xen_pv_guest_late_init(void)
#endif
}
-/* Check if running on Xen version (major, minor) or later */
-bool
-xen_running_on_version_or_later(unsigned int major, unsigned int minor)
-{
- unsigned int version;
-
- if (!xen_domain())
- return false;
-
- version = HYPERVISOR_xen_version(XENVER_version, NULL);
- if ((((version >> 16) == major) && ((version & 0xffff) >= minor)) ||
- ((version >> 16) > major))
- return true;
- return false;
-}
-
static __read_mostly unsigned int cpuid_leaf5_ecx_val;
static __read_mostly unsigned int cpuid_leaf5_edx_val;
@@ -204,7 +180,6 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
*bx &= maskebx;
}
-STACK_FRAME_NON_STANDARD(xen_cpuid); /* XEN_EMULATE_PREFIX */
static bool __init xen_check_mwait(void)
{
@@ -321,12 +296,12 @@ static void __init xen_init_capabilities(void)
}
}
-static void xen_set_debugreg(int reg, unsigned long val)
+static noinstr void xen_set_debugreg(int reg, unsigned long val)
{
HYPERVISOR_set_debugreg(reg, val);
}
-static unsigned long xen_get_debugreg(int reg)
+static noinstr unsigned long xen_get_debugreg(int reg)
{
return HYPERVISOR_get_debugreg(reg);
}
@@ -354,15 +329,13 @@ static void set_aliased_prot(void *v, pgprot_t prot)
pte_t *ptep;
pte_t pte;
unsigned long pfn;
- struct page *page;
unsigned char dummy;
+ void *va;
ptep = lookup_address((unsigned long)v, &level);
BUG_ON(ptep == NULL);
pfn = pte_pfn(*ptep);
- page = pfn_to_page(pfn);
-
pte = pfn_pte(pfn, prot);
/*
@@ -387,19 +360,15 @@ static void set_aliased_prot(void *v, pgprot_t prot)
preempt_disable();
- probe_kernel_read(&dummy, v, 1);
+ copy_from_kernel_nofault(&dummy, v, 1);
if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
BUG();
- if (!PageHighMem(page)) {
- void *av = __va(PFN_PHYS(pfn));
+ va = __va(PFN_PHYS(pfn));
- if (av != v)
- if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0))
- BUG();
- } else
- kmap_flush_unused();
+ if (va != v && HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
+ BUG();
preempt_enable();
}
@@ -539,30 +508,12 @@ static void load_TLS_descriptor(struct thread_struct *t,
static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
{
/*
- * XXX sleazy hack: If we're being called in a lazy-cpu zone
- * and lazy gs handling is enabled, it means we're in a
- * context switch, and %gs has just been saved. This means we
- * can zero it out to prevent faults on exit from the
- * hypervisor if the next process has no %gs. Either way, it
- * has been saved, and the new value will get loaded properly.
- * This will go away as soon as Xen has been modified to not
- * save/restore %gs for normal hypercalls.
- *
- * On x86_64, this hack is not used for %gs, because gs points
- * to KERNEL_GS_BASE (and uses it for PDA references), so we
- * must not zero %gs on x86_64
- *
- * For x86_64, we need to zero %fs, otherwise we may get an
+ * In lazy mode we need to zero %fs, otherwise we may get an
* exception between the new %fs descriptor being loaded and
* %fs being effectively cleared at __switch_to().
*/
- if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
-#ifdef CONFIG_X86_32
- lazy_load_gs(0);
-#else
+ if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)
loadsegment(fs, 0);
-#endif
- }
xen_mc_batch();
@@ -573,13 +524,11 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
xen_mc_issue(PARAVIRT_LAZY_CPU);
}
-#ifdef CONFIG_X86_64
static void xen_load_gs_index(unsigned int idx)
{
if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx))
BUG();
}
-#endif
static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
const void *ptr)
@@ -598,51 +547,113 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
preempt_enable();
}
-#ifdef CONFIG_X86_64
+void noist_exc_debug(struct pt_regs *regs);
+
+DEFINE_IDTENTRY_RAW(xenpv_exc_nmi)
+{
+ /* On Xen PV, NMI doesn't use IST. The C part is the same as native. */
+ exc_nmi(regs);
+}
+
+DEFINE_IDTENTRY_RAW_ERRORCODE(xenpv_exc_double_fault)
+{
+ /* On Xen PV, DF doesn't use IST. The C part is the same as native. */
+ exc_double_fault(regs, error_code);
+}
+
+DEFINE_IDTENTRY_RAW(xenpv_exc_debug)
+{
+ /*
+ * There's no IST on Xen PV, but we still need to dispatch
+ * to the correct handler.
+ */
+ if (user_mode(regs))
+ noist_exc_debug(regs);
+ else
+ exc_debug(regs);
+}
+
+DEFINE_IDTENTRY_RAW(exc_xen_unknown_trap)
+{
+ /* This should never happen and there is no way to handle it. */
+ instrumentation_begin();
+ pr_err("Unknown trap in Xen PV mode.");
+ BUG();
+ instrumentation_end();
+}
+
+#ifdef CONFIG_X86_MCE
+DEFINE_IDTENTRY_RAW(xenpv_exc_machine_check)
+{
+ /*
+ * There's no IST on Xen PV, but we still need to dispatch
+ * to the correct handler.
+ */
+ if (user_mode(regs))
+ noist_exc_machine_check(regs);
+ else
+ exc_machine_check(regs);
+}
+#endif
+
struct trap_array_entry {
void (*orig)(void);
void (*xen)(void);
bool ist_okay;
};
+#define TRAP_ENTRY(func, ist_ok) { \
+ .orig = asm_##func, \
+ .xen = xen_asm_##func, \
+ .ist_okay = ist_ok }
+
+#define TRAP_ENTRY_REDIR(func, ist_ok) { \
+ .orig = asm_##func, \
+ .xen = xen_asm_xenpv_##func, \
+ .ist_okay = ist_ok }
+
static struct trap_array_entry trap_array[] = {
- { debug, xen_xendebug, true },
- { double_fault, xen_double_fault, true },
+ TRAP_ENTRY_REDIR(exc_debug, true ),
+ TRAP_ENTRY_REDIR(exc_double_fault, true ),
#ifdef CONFIG_X86_MCE
- { machine_check, xen_machine_check, true },
+ TRAP_ENTRY_REDIR(exc_machine_check, true ),
#endif
- { nmi, xen_xennmi, true },
- { int3, xen_int3, false },
- { overflow, xen_overflow, false },
+ TRAP_ENTRY_REDIR(exc_nmi, true ),
+ TRAP_ENTRY(exc_int3, false ),
+ TRAP_ENTRY(exc_overflow, false ),
#ifdef CONFIG_IA32_EMULATION
{ entry_INT80_compat, xen_entry_INT80_compat, false },
#endif
- { page_fault, xen_page_fault, false },
- { divide_error, xen_divide_error, false },
- { bounds, xen_bounds, false },
- { invalid_op, xen_invalid_op, false },
- { device_not_available, xen_device_not_available, false },
- { coprocessor_segment_overrun, xen_coprocessor_segment_overrun, false },
- { invalid_TSS, xen_invalid_TSS, false },
- { segment_not_present, xen_segment_not_present, false },
- { stack_segment, xen_stack_segment, false },
- { general_protection, xen_general_protection, false },
- { spurious_interrupt_bug, xen_spurious_interrupt_bug, false },
- { coprocessor_error, xen_coprocessor_error, false },
- { alignment_check, xen_alignment_check, false },
- { simd_coprocessor_error, xen_simd_coprocessor_error, false },
+ TRAP_ENTRY(exc_page_fault, false ),
+ TRAP_ENTRY(exc_divide_error, false ),
+ TRAP_ENTRY(exc_bounds, false ),
+ TRAP_ENTRY(exc_invalid_op, false ),
+ TRAP_ENTRY(exc_device_not_available, false ),
+ TRAP_ENTRY(exc_coproc_segment_overrun, false ),
+ TRAP_ENTRY(exc_invalid_tss, false ),
+ TRAP_ENTRY(exc_segment_not_present, false ),
+ TRAP_ENTRY(exc_stack_segment, false ),
+ TRAP_ENTRY(exc_general_protection, false ),
+ TRAP_ENTRY(exc_spurious_interrupt_bug, false ),
+ TRAP_ENTRY(exc_coprocessor_error, false ),
+ TRAP_ENTRY(exc_alignment_check, false ),
+ TRAP_ENTRY(exc_simd_coprocessor_error, false ),
+#ifdef CONFIG_X86_KERNEL_IBT
+ TRAP_ENTRY(exc_control_protection, false ),
+#endif
};
static bool __ref get_trap_addr(void **addr, unsigned int ist)
{
unsigned int nr;
bool ist_okay = false;
+ bool found = false;
/*
* Replace trap handler addresses by Xen specific ones.
* Check for known traps using IST and whitelist them.
* The debugger ones are the only ones we care about.
- * Xen will handle faults like double_fault, * so we should never see
+ * Xen will handle faults like double_fault, so we should never see
* them. Warn if there's an unexpected IST-using fault handler.
*/
for (nr = 0; nr < ARRAY_SIZE(trap_array); nr++) {
@@ -651,6 +662,7 @@ static bool __ref get_trap_addr(void **addr, unsigned int ist)
if (*addr == entry->orig) {
*addr = entry->xen;
ist_okay = entry->ist_okay;
+ found = true;
break;
}
}
@@ -661,14 +673,17 @@ static bool __ref get_trap_addr(void **addr, unsigned int ist)
nr = (*addr - (void *)early_idt_handler_array[0]) /
EARLY_IDT_HANDLER_SIZE;
*addr = (void *)xen_early_idt_handler_array[nr];
+ found = true;
}
- if (WARN_ON(ist != 0 && !ist_okay))
+ if (!found)
+ *addr = (void *)xen_asm_exc_xen_unknown_trap;
+
+ if (WARN_ON(found && ist != 0 && !ist_okay))
return false;
return true;
}
-#endif
static int cvt_gate_to_trap(int vector, const gate_desc *val,
struct trap_info *info)
@@ -681,10 +696,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
info->vector = vector;
addr = gate_offset(val);
-#ifdef CONFIG_X86_64
if (!get_trap_addr((void **)&addr, val->bits.ist))
return 0;
-#endif /* CONFIG_X86_64 */
info->address = addr;
info->cs = gate_segment(val);
@@ -730,8 +743,8 @@ static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g)
preempt_enable();
}
-static void xen_convert_trap_info(const struct desc_ptr *desc,
- struct trap_info *traps)
+static unsigned xen_convert_trap_info(const struct desc_ptr *desc,
+ struct trap_info *traps, bool full)
{
unsigned in, out, count;
@@ -741,17 +754,18 @@ static void xen_convert_trap_info(const struct desc_ptr *desc,
for (in = out = 0; in < count; in++) {
gate_desc *entry = (gate_desc *)(desc->address) + in;
- if (cvt_gate_to_trap(in, entry, &traps[out]))
+ if (cvt_gate_to_trap(in, entry, &traps[out]) || full)
out++;
}
- traps[out].address = 0;
+
+ return out;
}
void xen_copy_trap_info(struct trap_info *traps)
{
const struct desc_ptr *desc = this_cpu_ptr(&idt_desc);
- xen_convert_trap_info(desc, traps);
+ xen_convert_trap_info(desc, traps, true);
}
/* Load a new IDT into Xen. In principle this can be per-CPU, so we
@@ -761,6 +775,8 @@ static void xen_load_idt(const struct desc_ptr *desc)
{
static DEFINE_SPINLOCK(lock);
static struct trap_info traps[257];
+ static const struct trap_info zero = { };
+ unsigned out;
trace_xen_cpu_load_idt(desc);
@@ -768,7 +784,8 @@ static void xen_load_idt(const struct desc_ptr *desc)
memcpy(this_cpu_ptr(&idt_desc), desc, sizeof(idt_desc));
- xen_convert_trap_info(desc, traps);
+ out = xen_convert_trap_info(desc, traps, false);
+ traps[out] = zero;
xen_mc_flush();
if (HYPERVISOR_set_trap_table(traps))
@@ -841,6 +858,17 @@ static void xen_load_sp0(unsigned long sp0)
}
#ifdef CONFIG_X86_IOPL_IOPERM
+static void xen_invalidate_io_bitmap(void)
+{
+ struct physdev_set_iobitmap iobitmap = {
+ .bitmap = NULL,
+ .nr_ports = 0,
+ };
+
+ native_tss_invalidate_io_bitmap();
+ HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &iobitmap);
+}
+
static void xen_update_io_bitmap(void)
{
struct physdev_set_iobitmap iobitmap;
@@ -899,14 +927,18 @@ static void xen_write_cr4(unsigned long cr4)
native_write_cr4(cr4);
}
-static u64 xen_read_msr_safe(unsigned int msr, int *err)
+static u64 xen_do_read_msr(unsigned int msr, int *err)
{
- u64 val;
+ u64 val = 0; /* Avoid uninitialized value for safe variant. */
if (pmu_msr_read(msr, &val, err))
return val;
- val = native_read_msr_safe(msr, err);
+ if (err)
+ val = native_read_msr_safe(msr, err);
+ else
+ val = native_read_msr(msr);
+
switch (msr) {
case MSR_IA32_APICBASE:
val &= ~X2APIC_ENABLE;
@@ -915,28 +947,40 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err)
return val;
}
-static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
+static void set_seg(unsigned int which, unsigned int low, unsigned int high,
+ int *err)
{
- int ret;
-#ifdef CONFIG_X86_64
- unsigned int which;
- u64 base;
-#endif
+ u64 base = ((u64)high << 32) | low;
+
+ if (HYPERVISOR_set_segment_base(which, base) == 0)
+ return;
- ret = 0;
+ if (err)
+ *err = -EIO;
+ else
+ WARN(1, "Xen set_segment_base(%u, %llx) failed\n", which, base);
+}
+/*
+ * Support write_msr_safe() and write_msr() semantics.
+ * With err == NULL write_msr() semantics are selected.
+ * Supplying an err pointer requires err to be pre-initialized with 0.
+ */
+static void xen_do_write_msr(unsigned int msr, unsigned int low,
+ unsigned int high, int *err)
+{
switch (msr) {
-#ifdef CONFIG_X86_64
- case MSR_FS_BASE: which = SEGBASE_FS; goto set;
- case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set;
- case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set;
-
- set:
- base = ((u64)high << 32) | low;
- if (HYPERVISOR_set_segment_base(which, base) != 0)
- ret = -EIO;
+ case MSR_FS_BASE:
+ set_seg(SEGBASE_FS, low, high, err);
+ break;
+
+ case MSR_KERNEL_GS_BASE:
+ set_seg(SEGBASE_GS_USER, low, high, err);
+ break;
+
+ case MSR_GS_BASE:
+ set_seg(SEGBASE_GS_KERNEL, low, high, err);
break;
-#endif
case MSR_STAR:
case MSR_CSTAR:
@@ -951,31 +995,42 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
break;
default:
- if (!pmu_msr_write(msr, low, high, &ret))
- ret = native_write_msr_safe(msr, low, high);
+ if (!pmu_msr_write(msr, low, high, err)) {
+ if (err)
+ *err = native_write_msr_safe(msr, low, high);
+ else
+ native_write_msr(msr, low, high);
+ }
}
+}
- return ret;
+static u64 xen_read_msr_safe(unsigned int msr, int *err)
+{
+ return xen_do_read_msr(msr, err);
+}
+
+static int xen_write_msr_safe(unsigned int msr, unsigned int low,
+ unsigned int high)
+{
+ int err = 0;
+
+ xen_do_write_msr(msr, low, high, &err);
+
+ return err;
}
static u64 xen_read_msr(unsigned int msr)
{
- /*
- * This will silently swallow a #GP from RDMSR. It may be worth
- * changing that.
- */
int err;
- return xen_read_msr_safe(msr, &err);
+ return xen_do_read_msr(msr, xen_msr_safe ? &err : NULL);
}
static void xen_write_msr(unsigned int msr, unsigned low, unsigned high)
{
- /*
- * This will silently swallow a #GP from WRMSR. It may be worth
- * changing that.
- */
- xen_write_msr_safe(msr, low, high);
+ int err;
+
+ xen_do_write_msr(msr, low, high, xen_msr_safe ? &err : NULL);
}
/* This is called once we have the cpu_possible_mask */
@@ -986,99 +1041,68 @@ void __init xen_setup_vcpu_info_placement(void)
for_each_possible_cpu(cpu) {
/* Set up direct vCPU id mapping for PV guests. */
per_cpu(xen_vcpu_id, cpu) = cpu;
-
- /*
- * xen_vcpu_setup(cpu) can fail -- in which case it
- * falls back to the shared_info version for cpus
- * where xen_vcpu_nr(cpu) < MAX_VIRT_CPUS.
- *
- * xen_cpu_up_prepare_pv() handles the rest by failing
- * them in hotplug.
- */
- (void) xen_vcpu_setup(cpu);
+ xen_vcpu_setup(cpu);
}
- /*
- * xen_vcpu_setup managed to place the vcpu_info within the
- * percpu area for all cpus, so make use of it.
- */
- if (xen_have_vcpu_info_placement) {
- pv_ops.irq.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
- pv_ops.irq.restore_fl =
- __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
- pv_ops.irq.irq_disable =
- __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
- pv_ops.irq.irq_enable =
- __PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
- pv_ops.mmu.read_cr2 =
- __PV_IS_CALLEE_SAVE(xen_read_cr2_direct);
- }
+ pv_ops.irq.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
+ pv_ops.irq.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
+ pv_ops.irq.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
+ pv_ops.mmu.read_cr2 = __PV_IS_CALLEE_SAVE(xen_read_cr2_direct);
}
static const struct pv_info xen_info __initconst = {
- .shared_kernel_pmd = 0,
-
-#ifdef CONFIG_X86_64
.extra_user_64bit_cs = FLAT_USER_CS64,
-#endif
.name = "Xen",
};
-static const struct pv_cpu_ops xen_cpu_ops __initconst = {
- .cpuid = xen_cpuid,
-
- .set_debugreg = xen_set_debugreg,
- .get_debugreg = xen_get_debugreg,
+static const typeof(pv_ops) xen_cpu_ops __initconst = {
+ .cpu = {
+ .cpuid = xen_cpuid,
- .read_cr0 = xen_read_cr0,
- .write_cr0 = xen_write_cr0,
+ .set_debugreg = xen_set_debugreg,
+ .get_debugreg = xen_get_debugreg,
- .write_cr4 = xen_write_cr4,
+ .read_cr0 = xen_read_cr0,
+ .write_cr0 = xen_write_cr0,
- .wbinvd = native_wbinvd,
+ .write_cr4 = xen_write_cr4,
- .read_msr = xen_read_msr,
- .write_msr = xen_write_msr,
+ .wbinvd = native_wbinvd,
- .read_msr_safe = xen_read_msr_safe,
- .write_msr_safe = xen_write_msr_safe,
+ .read_msr = xen_read_msr,
+ .write_msr = xen_write_msr,
- .read_pmc = xen_read_pmc,
+ .read_msr_safe = xen_read_msr_safe,
+ .write_msr_safe = xen_write_msr_safe,
- .iret = xen_iret,
-#ifdef CONFIG_X86_64
- .usergs_sysret64 = xen_sysret64,
-#endif
+ .read_pmc = xen_read_pmc,
- .load_tr_desc = paravirt_nop,
- .set_ldt = xen_set_ldt,
- .load_gdt = xen_load_gdt,
- .load_idt = xen_load_idt,
- .load_tls = xen_load_tls,
-#ifdef CONFIG_X86_64
- .load_gs_index = xen_load_gs_index,
-#endif
+ .load_tr_desc = paravirt_nop,
+ .set_ldt = xen_set_ldt,
+ .load_gdt = xen_load_gdt,
+ .load_idt = xen_load_idt,
+ .load_tls = xen_load_tls,
+ .load_gs_index = xen_load_gs_index,
- .alloc_ldt = xen_alloc_ldt,
- .free_ldt = xen_free_ldt,
+ .alloc_ldt = xen_alloc_ldt,
+ .free_ldt = xen_free_ldt,
- .store_tr = xen_store_tr,
+ .store_tr = xen_store_tr,
- .write_ldt_entry = xen_write_ldt_entry,
- .write_gdt_entry = xen_write_gdt_entry,
- .write_idt_entry = xen_write_idt_entry,
- .load_sp0 = xen_load_sp0,
+ .write_ldt_entry = xen_write_ldt_entry,
+ .write_gdt_entry = xen_write_gdt_entry,
+ .write_idt_entry = xen_write_idt_entry,
+ .load_sp0 = xen_load_sp0,
#ifdef CONFIG_X86_IOPL_IOPERM
- .update_io_bitmap = xen_update_io_bitmap,
+ .invalidate_io_bitmap = xen_invalidate_io_bitmap,
+ .update_io_bitmap = xen_update_io_bitmap,
#endif
- .io_delay = xen_io_delay,
+ .io_delay = xen_io_delay,
- /* Xen takes care of %gs when switching to usermode for us */
- .swapgs = paravirt_nop,
-
- .start_context_switch = paravirt_start_context_switch,
- .end_context_switch = xen_end_context_switch,
+ .start_context_switch = paravirt_start_context_switch,
+ .end_context_switch = xen_end_context_switch,
+ },
};
static void xen_restart(char *msg)
@@ -1093,8 +1117,7 @@ static void xen_machine_halt(void)
static void xen_machine_power_off(void)
{
- if (pm_power_off)
- pm_power_off();
+ do_kernel_power_off();
xen_reboot(SHUTDOWN_poweroff);
}
@@ -1186,7 +1209,6 @@ static void __init xen_setup_gdt(int cpu)
pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry_boot;
pv_ops.cpu.load_gdt = xen_load_gdt_boot;
- setup_stack_canary_segment(cpu);
switch_to_new_gdt(cpu);
pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry;
@@ -1198,16 +1220,31 @@ static void __init xen_dom0_set_legacy_features(void)
x86_platform.legacy.rtc = 1;
}
+static void __init xen_domu_set_legacy_features(void)
+{
+ x86_platform.legacy.rtc = 0;
+}
+
+extern void early_xen_iret_patch(void);
+
/* First C function to be called on Xen boot */
-asmlinkage __visible void __init xen_start_kernel(void)
+asmlinkage __visible void __init xen_start_kernel(struct start_info *si)
{
struct physdev_set_iopl set_iopl;
unsigned long initrd_start = 0;
int rc;
- if (!xen_start_info)
+ if (!si)
return;
+ clear_bss();
+
+ xen_start_info = si;
+
+ __text_gen_insn(&early_xen_iret_patch,
+ JMP32_INSN_OPCODE, &early_xen_iret_patch, &xen_iret,
+ JMP32_INSN_SIZE);
+
xen_domain_type = XEN_PV_DOMAIN;
xen_start_flags = xen_start_info->flags;
@@ -1215,8 +1252,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
/* Install Xen paravirt ops */
pv_info = xen_info;
- pv_ops.init.patch = paravirt_patch_default;
- pv_ops.cpu = xen_cpu_ops;
+ pv_ops.cpu = xen_cpu_ops.cpu;
xen_init_irq_ops();
/*
@@ -1249,25 +1285,19 @@ asmlinkage __visible void __init xen_start_kernel(void)
__supported_pte_mask &= ~_PAGE_GLOBAL;
__default_kernel_pte_mask &= ~_PAGE_GLOBAL;
- /*
- * Prevent page tables from being allocated in highmem, even
- * if CONFIG_HIGHPTE is enabled.
- */
- __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
-
/* Get mfn list */
xen_build_dynamic_phys_to_machine();
+ /* Work out if we support NX */
+ get_cpu_cap(&boot_cpu_data);
+ x86_configure_nx();
+
/*
* Set up kernel GDT and segment registers, mainly so that
* -fstack-protector code can be executed.
*/
xen_setup_gdt(0);
- /* Work out if we support NX */
- get_cpu_cap(&boot_cpu_data);
- x86_configure_nx();
-
/* Determine virtual and physical address sizes */
get_cpu_address_sizes(&boot_cpu_data);
@@ -1285,13 +1315,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_init_apic();
#endif
- if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
- pv_ops.mmu.ptep_modify_prot_start =
- xen_ptep_modify_prot_start;
- pv_ops.mmu.ptep_modify_prot_commit =
- xen_ptep_modify_prot_commit;
- }
-
machine_ops = xen_machine_ops;
/*
@@ -1309,7 +1332,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
* any NUMA information the kernel tries to get from ACPI will
* be meaningless. Prevent it from trying.
*/
- acpi_numa = -1;
+ disable_srat();
#endif
WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv));
@@ -1321,18 +1344,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_start_info->nr_pages);
xen_reserve_special_pages();
- /* keep using Xen gdt for now; no urgent need to change it */
-
-#ifdef CONFIG_X86_32
- pv_info.kernel_rpl = 1;
- if (xen_feature(XENFEAT_supervisor_mode_kernel))
- pv_info.kernel_rpl = 0;
-#else
- pv_info.kernel_rpl = 0;
-#endif
- /* set the limit of our address space */
- xen_reserve_top();
-
/*
* We used to do this in xen_arch_setup, but that is too late
* on AMD were early_cpu_init (run before ->arch_setup()) calls
@@ -1343,12 +1354,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
if (rc != 0)
xen_raw_printk("physdev_op failed %d\n", rc);
-#ifdef CONFIG_X86_32
- /* set up basic CPUID stuff */
- cpu_detect(&new_cpu_data);
- set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
- new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1);
-#endif
if (xen_start_info->mod_start) {
if (xen_start_info->flags & SIF_MOD_START_PFN)
@@ -1365,9 +1370,10 @@ asmlinkage __visible void __init xen_start_kernel(void)
boot_params.hdr.hardware_subarch = X86_SUBARCH_XEN;
if (!xen_initial_domain()) {
- add_preferred_console("xenboot", 0, NULL);
if (pci_xen)
x86_init.pci.arch_init = pci_xen_init;
+ x86_platform.set_legacy_features =
+ xen_domu_set_legacy_features;
} else {
const struct dom0_vga_console_info *info =
(void *)((char *)xen_start_info +
@@ -1392,18 +1398,19 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_acpi_sleep_register();
- /* Avoid searching for BIOS MP tables */
- x86_init.mpparse.find_smp_config = x86_init_noop;
- x86_init.mpparse.get_smp_config = x86_init_uint_noop;
-
xen_boot_params_init_edd();
+
+#ifdef CONFIG_ACPI
+ /*
+ * Disable selecting "Firmware First mode" for correctable
+ * memory errors, as this is the duty of the hypervisor to
+ * decide.
+ */
+ acpi_disable_cmcff = 1;
+#endif
}
- if (!boot_params.screen_info.orig_video_isVGA)
- add_preferred_console("tty", 0, NULL);
- add_preferred_console("hvc", 0, NULL);
- if (boot_params.screen_info.orig_video_isVGA)
- add_preferred_console("tty", 0, NULL);
+ xen_add_preferred_consoles();
#ifdef CONFIG_PCI
/* PCI BIOS service won't work from a PV guest. */
@@ -1417,12 +1424,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_efi_init(&boot_params);
/* Start the world */
-#ifdef CONFIG_X86_32
- i386_start_kernel();
-#else
cr4_init_shadow(); /* 32b kernel does this in i386_start_kernel() */
x86_64_start_reservations((char *)__pa_symbol(&boot_params));
-#endif
}
static int xen_cpu_up_prepare_pv(unsigned int cpu)