aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/xen')
-rw-r--r--arch/x86/xen/efi.c2
-rw-r--r--arch/x86/xen/enlighten.c23
-rw-r--r--arch/x86/xen/mmu.c75
-rw-r--r--arch/x86/xen/multicalls.c8
-rw-r--r--arch/x86/xen/p2m.c23
-rw-r--r--arch/x86/xen/p2m.h15
-rw-r--r--arch/x86/xen/setup.c370
-rw-r--r--arch/x86/xen/smp.c31
-rw-r--r--arch/x86/xen/smp.h8
-rw-r--r--arch/x86/xen/spinlock.c2
-rw-r--r--arch/x86/xen/time.c10
-rw-r--r--arch/x86/xen/xen-head.S36
12 files changed, 423 insertions, 180 deletions
diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
index a02e09e18f57..be14cc3e48d5 100644
--- a/arch/x86/xen/efi.c
+++ b/arch/x86/xen/efi.c
@@ -15,12 +15,14 @@
* with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/bitops.h>
#include <linux/efi.h>
#include <linux/init.h>
#include <linux/string.h>
#include <xen/xen-ops.h>
+#include <asm/page.h>
#include <asm/setup.h>
void __init xen_efi_init(void)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c0cb11fb5008..1a3f0445432a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -821,7 +821,7 @@ static void xen_convert_trap_info(const struct desc_ptr *desc,
void xen_copy_trap_info(struct trap_info *traps)
{
- const struct desc_ptr *desc = &__get_cpu_var(idt_desc);
+ const struct desc_ptr *desc = this_cpu_ptr(&idt_desc);
xen_convert_trap_info(desc, traps);
}
@@ -838,7 +838,7 @@ static void xen_load_idt(const struct desc_ptr *desc)
spin_lock(&lock);
- __get_cpu_var(idt_desc) = *desc;
+ memcpy(this_cpu_ptr(&idt_desc), desc, sizeof(idt_desc));
xen_convert_trap_info(desc, traps);
@@ -1463,6 +1463,7 @@ static void __ref xen_setup_gdt(int cpu)
pv_cpu_ops.load_gdt = xen_load_gdt;
}
+#ifdef CONFIG_XEN_PVH
/*
* A PV guest starts with default flags that are not set for PVH, set them
* here asap.
@@ -1508,17 +1509,21 @@ static void __init xen_pvh_early_guest_init(void)
return;
xen_have_vector_callback = 1;
+
+ xen_pvh_early_cpu_init(0, false);
xen_pvh_set_cr_flags(0);
#ifdef CONFIG_X86_32
BUG(); /* PVH: Implement proper support. */
#endif
}
+#endif /* CONFIG_XEN_PVH */
/* First C function to be called on Xen boot */
asmlinkage __visible void __init xen_start_kernel(void)
{
struct physdev_set_iopl set_iopl;
+ unsigned long initrd_start = 0;
int rc;
if (!xen_start_info)
@@ -1527,7 +1532,9 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_domain_type = XEN_PV_DOMAIN;
xen_setup_features();
+#ifdef CONFIG_XEN_PVH
xen_pvh_early_guest_init();
+#endif
xen_setup_machphys_mapping();
/* Install Xen paravirt ops */
@@ -1559,8 +1566,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
#endif
__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
- __supported_pte_mask |= _PAGE_IOMAP;
-
/*
* Prevent page tables from being allocated in highmem, even
* if CONFIG_HIGHPTE is enabled.
@@ -1667,10 +1672,16 @@ asmlinkage __visible void __init xen_start_kernel(void)
new_cpu_data.x86_capability[0] = cpuid_edx(1);
#endif
+ if (xen_start_info->mod_start) {
+ if (xen_start_info->flags & SIF_MOD_START_PFN)
+ initrd_start = PFN_PHYS(xen_start_info->mod_start);
+ else
+ initrd_start = __pa(xen_start_info->mod_start);
+ }
+
/* Poke various useful things into boot_params */
boot_params.hdr.type_of_loader = (9 << 4) | 0;
- boot_params.hdr.ramdisk_image = xen_start_info->mod_start
- ? __pa(xen_start_info->mod_start) : 0;
+ boot_params.hdr.ramdisk_image = initrd_start;
boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index e8a1201c3293..f62af7647ec9 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -399,38 +399,14 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
if (unlikely(mfn == INVALID_P2M_ENTRY)) {
mfn = 0;
flags = 0;
- } else {
- /*
- * Paramount to do this test _after_ the
- * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY &
- * IDENTITY_FRAME_BIT resolves to true.
- */
- mfn &= ~FOREIGN_FRAME_BIT;
- if (mfn & IDENTITY_FRAME_BIT) {
- mfn &= ~IDENTITY_FRAME_BIT;
- flags |= _PAGE_IOMAP;
- }
- }
+ } else
+ mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
}
return val;
}
-static pteval_t iomap_pte(pteval_t val)
-{
- if (val & _PAGE_PRESENT) {
- unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
- pteval_t flags = val & PTE_FLAGS_MASK;
-
- /* We assume the pte frame number is a MFN, so
- just use it as-is. */
- val = ((pteval_t)pfn << PAGE_SHIFT) | flags;
- }
-
- return val;
-}
-
__visible pteval_t xen_pte_val(pte_t pte)
{
pteval_t pteval = pte.pte;
@@ -441,9 +417,6 @@ __visible pteval_t xen_pte_val(pte_t pte)
pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT;
}
#endif
- if (xen_initial_domain() && (pteval & _PAGE_IOMAP))
- return pteval;
-
return pte_mfn_to_pfn(pteval);
}
PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
@@ -481,7 +454,6 @@ void xen_set_pat(u64 pat)
__visible pte_t xen_make_pte(pteval_t pte)
{
- phys_addr_t addr = (pte & PTE_PFN_MASK);
#if 0
/* If Linux is trying to set a WC pte, then map to the Xen WC.
* If _PAGE_PAT is set, then it probably means it is really
@@ -496,19 +468,7 @@ __visible pte_t xen_make_pte(pteval_t pte)
pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT;
}
#endif
- /*
- * Unprivileged domains are allowed to do IOMAPpings for
- * PCI passthrough, but not map ISA space. The ISA
- * mappings are just dummy local mappings to keep other
- * parts of the kernel happy.
- */
- if (unlikely(pte & _PAGE_IOMAP) &&
- (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
- pte = iomap_pte(pte);
- } else {
- pte &= ~_PAGE_IOMAP;
- pte = pte_pfn_to_mfn(pte);
- }
+ pte = pte_pfn_to_mfn(pte);
return native_make_pte(pte);
}
@@ -1866,12 +1826,11 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
*
* We can construct this by grafting the Xen provided pagetable into
* head_64.S's preconstructed pagetables. We copy the Xen L2's into
- * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This
- * means that only the kernel has a physical mapping to start with -
- * but that's enough to get __va working. We need to fill in the rest
- * of the physical mapping once some sort of allocator has been set
- * up.
- * NOTE: for PVH, the page tables are native.
+ * level2_ident_pgt, and level2_kernel_pgt. This means that only the
+ * kernel has a physical mapping to start with - but that's enough to
+ * get __va working. We need to fill in the rest of the physical
+ * mapping once some sort of allocator has been set up. NOTE: for
+ * PVH, the page tables are native.
*/
void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
{
@@ -1902,8 +1861,11 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
/* L3_i[0] -> level2_ident_pgt */
convert_pfn_mfn(level3_ident_pgt);
/* L3_k[510] -> level2_kernel_pgt
- * L3_i[511] -> level2_fixmap_pgt */
+ * L3_k[511] -> level2_fixmap_pgt */
convert_pfn_mfn(level3_kernel_pgt);
+
+ /* L3_k[511][506] -> level1_fixmap_pgt */
+ convert_pfn_mfn(level2_fixmap_pgt);
}
/* We get [511][511] and have Xen's version of level2_kernel_pgt */
l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
@@ -1913,21 +1875,15 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
addr[1] = (unsigned long)l3;
addr[2] = (unsigned long)l2;
/* Graft it onto L4[272][0]. Note that we creating an aliasing problem:
- * Both L4[272][0] and L4[511][511] have entries that point to the same
+ * Both L4[272][0] and L4[511][510] have entries that point to the same
* L2 (PMD) tables. Meaning that if you modify it in __va space
* it will be also modified in the __ka space! (But if you just
* modify the PMD table to point to other PTE's or none, then you
* are OK - which is what cleanup_highmap does) */
copy_page(level2_ident_pgt, l2);
- /* Graft it onto L4[511][511] */
+ /* Graft it onto L4[511][510] */
copy_page(level2_kernel_pgt, l2);
- /* Get [511][510] and graft that in level2_fixmap_pgt */
- l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
- l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
- copy_page(level2_fixmap_pgt, l2);
- /* Note that we don't do anything with level1_fixmap_pgt which
- * we don't need. */
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
/* Make pagetable pieces RO */
set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
@@ -1937,6 +1893,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
+ set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO);
/* Pin down new L4 */
pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
@@ -2094,7 +2051,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
default:
/* By default, set_fixmap is used for hardware mappings */
- pte = mfn_pte(phys, __pgprot(pgprot_val(prot) | _PAGE_IOMAP));
+ pte = mfn_pte(phys, prot);
break;
}
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
index 0d82003e76ad..ea54a08d8301 100644
--- a/arch/x86/xen/multicalls.c
+++ b/arch/x86/xen/multicalls.c
@@ -54,7 +54,7 @@ DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags);
void xen_mc_flush(void)
{
- struct mc_buffer *b = &__get_cpu_var(mc_buffer);
+ struct mc_buffer *b = this_cpu_ptr(&mc_buffer);
struct multicall_entry *mc;
int ret = 0;
unsigned long flags;
@@ -131,7 +131,7 @@ void xen_mc_flush(void)
struct multicall_space __xen_mc_entry(size_t args)
{
- struct mc_buffer *b = &__get_cpu_var(mc_buffer);
+ struct mc_buffer *b = this_cpu_ptr(&mc_buffer);
struct multicall_space ret;
unsigned argidx = roundup(b->argidx, sizeof(u64));
@@ -162,7 +162,7 @@ struct multicall_space __xen_mc_entry(size_t args)
struct multicall_space xen_mc_extend_args(unsigned long op, size_t size)
{
- struct mc_buffer *b = &__get_cpu_var(mc_buffer);
+ struct mc_buffer *b = this_cpu_ptr(&mc_buffer);
struct multicall_space ret = { NULL, NULL };
BUG_ON(preemptible());
@@ -192,7 +192,7 @@ out:
void xen_mc_callback(void (*fn)(void *), void *data)
{
- struct mc_buffer *b = &__get_cpu_var(mc_buffer);
+ struct mc_buffer *b = this_cpu_ptr(&mc_buffer);
struct callback *cb;
if (b->cbidx == MC_BATCH) {
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 3172692381ae..9f5983b01ed9 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -173,6 +173,7 @@
#include <xen/balloon.h>
#include <xen/grant_table.h>
+#include "p2m.h"
#include "multicalls.h"
#include "xen-ops.h"
@@ -180,12 +181,6 @@ static void __init m2p_override_init(void);
unsigned long xen_max_p2m_pfn __read_mostly;
-#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
-#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *))
-#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **))
-
-#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)
-
/* Placeholders for holes in the address space */
static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE);
@@ -202,16 +197,12 @@ static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_identity_mfn, P2M_MID_PER_PAGE);
RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
-/* We might hit two boundary violations at the start and end, at max each
- * boundary violation will require three middle nodes. */
-RESERVE_BRK(p2m_mid_extra, PAGE_SIZE * 2 * 3);
-
-/* When we populate back during bootup, the amount of pages can vary. The
- * max we have is seen is 395979, but that does not mean it can't be more.
- * Some machines can have 3GB I/O holes even. With early_can_reuse_p2m_middle
- * it can re-use Xen provided mfn_list array, so we only need to allocate at
- * most three P2M top nodes. */
-RESERVE_BRK(p2m_populated, PAGE_SIZE * 3);
+/* For each I/O range remapped we may lose up to two leaf pages for the boundary
+ * violations and three mid pages to cover up to 3GB. With
+ * early_can_reuse_p2m_middle() most of the leaf pages will be reused by the
+ * remapped region.
+ */
+RESERVE_BRK(p2m_identity_remap, PAGE_SIZE * 2 * 3 * MAX_REMAP_RANGES);
static inline unsigned p2m_top_index(unsigned long pfn)
{
diff --git a/arch/x86/xen/p2m.h b/arch/x86/xen/p2m.h
new file mode 100644
index 000000000000..ad8aee24ab72
--- /dev/null
+++ b/arch/x86/xen/p2m.h
@@ -0,0 +1,15 @@
+#ifndef _XEN_P2M_H
+#define _XEN_P2M_H
+
+#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
+#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *))
+#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **))
+
+#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)
+
+#define MAX_REMAP_RANGES 10
+
+extern unsigned long __init set_phys_range_identity(unsigned long pfn_s,
+ unsigned long pfn_e);
+
+#endif /* _XEN_P2M_H */
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 2e555163c2fe..af7216128d93 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -29,6 +29,7 @@
#include <xen/features.h>
#include "xen-ops.h"
#include "vdso.h"
+#include "p2m.h"
/* These are code, but not functions. Defined in entry.S */
extern const char xen_hypervisor_callback[];
@@ -46,6 +47,9 @@ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
/* Number of pages released from the initial allocation. */
unsigned long xen_released_pages;
+/* Buffer used to remap identity mapped pages */
+unsigned long xen_remap_buf[P2M_PER_PAGE] __initdata;
+
/*
* The maximum amount of extra memory compared to the base size. The
* main scaling factor is the size of struct page. At extreme ratios
@@ -151,107 +155,325 @@ static unsigned long __init xen_do_chunk(unsigned long start,
return len;
}
-static unsigned long __init xen_release_chunk(unsigned long start,
- unsigned long end)
-{
- return xen_do_chunk(start, end, true);
-}
-
-static unsigned long __init xen_populate_chunk(
+/*
+ * Finds the next RAM pfn available in the E820 map after min_pfn.
+ * This function updates min_pfn with the pfn found and returns
+ * the size of that range or zero if not found.
+ */
+static unsigned long __init xen_find_pfn_range(
const struct e820entry *list, size_t map_size,
- unsigned long max_pfn, unsigned long *last_pfn,
- unsigned long credits_left)
+ unsigned long *min_pfn)
{
const struct e820entry *entry;
unsigned int i;
unsigned long done = 0;
- unsigned long dest_pfn;
for (i = 0, entry = list; i < map_size; i++, entry++) {
unsigned long s_pfn;
unsigned long e_pfn;
- unsigned long pfns;
- long capacity;
-
- if (credits_left <= 0)
- break;
if (entry->type != E820_RAM)
continue;
e_pfn = PFN_DOWN(entry->addr + entry->size);
- /* We only care about E820 after the xen_start_info->nr_pages */
- if (e_pfn <= max_pfn)
+ /* We only care about E820 after this */
+ if (e_pfn < *min_pfn)
continue;
s_pfn = PFN_UP(entry->addr);
- /* If the E820 falls within the nr_pages, we want to start
- * at the nr_pages PFN.
- * If that would mean going past the E820 entry, skip it
+
+ /* If min_pfn falls within the E820 entry, we want to start
+ * at the min_pfn PFN.
*/
- if (s_pfn <= max_pfn) {
- capacity = e_pfn - max_pfn;
- dest_pfn = max_pfn;
+ if (s_pfn <= *min_pfn) {
+ done = e_pfn - *min_pfn;
} else {
- capacity = e_pfn - s_pfn;
- dest_pfn = s_pfn;
+ done = e_pfn - s_pfn;
+ *min_pfn = s_pfn;
}
+ break;
+ }
- if (credits_left < capacity)
- capacity = credits_left;
+ return done;
+}
- pfns = xen_do_chunk(dest_pfn, dest_pfn + capacity, false);
- done += pfns;
- *last_pfn = (dest_pfn + pfns);
- if (pfns < capacity)
- break;
- credits_left -= pfns;
+/*
+ * This releases a chunk of memory and then does the identity map. It's used as
+ * as a fallback if the remapping fails.
+ */
+static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
+ unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity,
+ unsigned long *released)
+{
+ WARN_ON(start_pfn > end_pfn);
+
+ /* Need to release pages first */
+ *released += xen_do_chunk(start_pfn, min(end_pfn, nr_pages), true);
+ *identity += set_phys_range_identity(start_pfn, end_pfn);
+}
+
+/*
+ * Helper function to update both the p2m and m2p tables.
+ */
+static unsigned long __init xen_update_mem_tables(unsigned long pfn,
+ unsigned long mfn)
+{
+ struct mmu_update update = {
+ .ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
+ .val = pfn
+ };
+
+ /* Update p2m */
+ if (!early_set_phys_to_machine(pfn, mfn)) {
+ WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n",
+ pfn, mfn);
+ return false;
}
- return done;
+
+ /* Update m2p */
+ if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) {
+ WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n",
+ mfn, pfn);
+ return false;
+ }
+
+ return true;
}
-static void __init xen_set_identity_and_release_chunk(
- unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages,
- unsigned long *released, unsigned long *identity)
+/*
+ * This function updates the p2m and m2p tables with an identity map from
+ * start_pfn to start_pfn+size and remaps the underlying RAM of the original
+ * allocation at remap_pfn. It must do so carefully in P2M_PER_PAGE sized blocks
+ * to not exhaust the reserved brk space. Doing it in properly aligned blocks
+ * ensures we only allocate the minimum required leaf pages in the p2m table. It
+ * copies the existing mfns from the p2m table under the 1:1 map, overwrites
+ * them with the identity map and then updates the p2m and m2p tables with the
+ * remapped memory.
+ */
+static unsigned long __init xen_do_set_identity_and_remap_chunk(
+ unsigned long start_pfn, unsigned long size, unsigned long remap_pfn)
{
- unsigned long pfn;
+ unsigned long ident_pfn_iter, remap_pfn_iter;
+ unsigned long ident_start_pfn_align, remap_start_pfn_align;
+ unsigned long ident_end_pfn_align, remap_end_pfn_align;
+ unsigned long ident_boundary_pfn, remap_boundary_pfn;
+ unsigned long ident_cnt = 0;
+ unsigned long remap_cnt = 0;
+ unsigned long left = size;
+ unsigned long mod;
+ int i;
+
+ WARN_ON(size == 0);
+
+ BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
/*
- * If the PFNs are currently mapped, clear the mappings
- * (except for the ISA region which must be 1:1 mapped) to
- * release the refcounts (in Xen) on the original frames.
+ * Determine the proper alignment to remap memory in P2M_PER_PAGE sized
+ * blocks. We need to keep track of both the existing pfn mapping and
+ * the new pfn remapping.
*/
- for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) {
- pte_t pte = __pte_ma(0);
+ mod = start_pfn % P2M_PER_PAGE;
+ ident_start_pfn_align =
+ mod ? (start_pfn - mod + P2M_PER_PAGE) : start_pfn;
+ mod = remap_pfn % P2M_PER_PAGE;
+ remap_start_pfn_align =
+ mod ? (remap_pfn - mod + P2M_PER_PAGE) : remap_pfn;
+ mod = (start_pfn + size) % P2M_PER_PAGE;
+ ident_end_pfn_align = start_pfn + size - mod;
+ mod = (remap_pfn + size) % P2M_PER_PAGE;
+ remap_end_pfn_align = remap_pfn + size - mod;
+
+ /* Iterate over each p2m leaf node in each range */
+ for (ident_pfn_iter = ident_start_pfn_align, remap_pfn_iter = remap_start_pfn_align;
+ ident_pfn_iter < ident_end_pfn_align && remap_pfn_iter < remap_end_pfn_align;
+ ident_pfn_iter += P2M_PER_PAGE, remap_pfn_iter += P2M_PER_PAGE) {
+ /* Check we aren't past the end */
+ BUG_ON(ident_pfn_iter + P2M_PER_PAGE > start_pfn + size);
+ BUG_ON(remap_pfn_iter + P2M_PER_PAGE > remap_pfn + size);
+
+ /* Save p2m mappings */
+ for (i = 0; i < P2M_PER_PAGE; i++)
+ xen_remap_buf[i] = pfn_to_mfn(ident_pfn_iter + i);
+
+ /* Set identity map which will free a p2m leaf */
+ ident_cnt += set_phys_range_identity(ident_pfn_iter,
+ ident_pfn_iter + P2M_PER_PAGE);
+
+#ifdef DEBUG
+ /* Helps verify a p2m leaf has been freed */
+ for (i = 0; i < P2M_PER_PAGE; i++) {
+ unsigned int pfn = ident_pfn_iter + i;
+ BUG_ON(pfn_to_mfn(pfn) != pfn);
+ }
+#endif
+ /* Now remap memory */
+ for (i = 0; i < P2M_PER_PAGE; i++) {
+ unsigned long mfn = xen_remap_buf[i];
+
+ /* This will use the p2m leaf freed above */
+ if (!xen_update_mem_tables(remap_pfn_iter + i, mfn)) {
+ WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n",
+ remap_pfn_iter + i, mfn);
+ return 0;
+ }
+
+ remap_cnt++;
+ }
- if (pfn < PFN_UP(ISA_END_ADDRESS))
- pte = mfn_pte(pfn, PAGE_KERNEL_IO);
+ left -= P2M_PER_PAGE;
+ }
- (void)HYPERVISOR_update_va_mapping(
- (unsigned long)__va(pfn << PAGE_SHIFT), pte, 0);
+ /* Max boundary space possible */
+ BUG_ON(left > (P2M_PER_PAGE - 1) * 2);
+
+ /* Now handle the boundary conditions */
+ ident_boundary_pfn = start_pfn;
+ remap_boundary_pfn = remap_pfn;
+ for (i = 0; i < left; i++) {
+ unsigned long mfn;
+
+ /* These two checks move from the start to end boundaries */
+ if (ident_boundary_pfn == ident_start_pfn_align)
+ ident_boundary_pfn = ident_pfn_iter;
+ if (remap_boundary_pfn == remap_start_pfn_align)
+ remap_boundary_pfn = remap_pfn_iter;
+
+ /* Check we aren't past the end */
+ BUG_ON(ident_boundary_pfn >= start_pfn + size);
+ BUG_ON(remap_boundary_pfn >= remap_pfn + size);
+
+ mfn = pfn_to_mfn(ident_boundary_pfn);
+
+ if (!xen_update_mem_tables(remap_boundary_pfn, mfn)) {
+ WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n",
+ remap_pfn_iter + i, mfn);
+ return 0;
+ }
+ remap_cnt++;
+
+ ident_boundary_pfn++;
+ remap_boundary_pfn++;
}
- if (start_pfn < nr_pages)
- *released += xen_release_chunk(
- start_pfn, min(end_pfn, nr_pages));
+ /* Finish up the identity map */
+ if (ident_start_pfn_align >= ident_end_pfn_align) {
+ /*
+ * In this case we have an identity range which does not span an
+ * aligned block so everything needs to be identity mapped here.
+ * If we didn't check this we might remap too many pages since
+ * the align boundaries are not meaningful in this case.
+ */
+ ident_cnt += set_phys_range_identity(start_pfn,
+ start_pfn + size);
+ } else {
+ /* Remapped above so check each end of the chunk */
+ if (start_pfn < ident_start_pfn_align)
+ ident_cnt += set_phys_range_identity(start_pfn,
+ ident_start_pfn_align);
+ if (start_pfn + size > ident_pfn_iter)
+ ident_cnt += set_phys_range_identity(ident_pfn_iter,
+ start_pfn + size);
+ }
- *identity += set_phys_range_identity(start_pfn, end_pfn);
+ BUG_ON(ident_cnt != size);
+ BUG_ON(remap_cnt != size);
+
+ return size;
}
-static unsigned long __init xen_set_identity_and_release(
- const struct e820entry *list, size_t map_size, unsigned long nr_pages)
+/*
+ * This function takes a contiguous pfn range that needs to be identity mapped
+ * and:
+ *
+ * 1) Finds a new range of pfns to use to remap based on E820 and remap_pfn.
+ * 2) Calls the do_ function to actually do the mapping/remapping work.
+ *
+ * The goal is to not allocate additional memory but to remap the existing
+ * pages. In the case of an error the underlying memory is simply released back
+ * to Xen and not remapped.
+ */
+static unsigned long __init xen_set_identity_and_remap_chunk(
+ const struct e820entry *list, size_t map_size, unsigned long start_pfn,
+ unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn,
+ unsigned long *identity, unsigned long *remapped,
+ unsigned long *released)
+{
+ unsigned long pfn;
+ unsigned long i = 0;
+ unsigned long n = end_pfn - start_pfn;
+
+ while (i < n) {
+ unsigned long cur_pfn = start_pfn + i;
+ unsigned long left = n - i;
+ unsigned long size = left;
+ unsigned long remap_range_size;
+
+ /* Do not remap pages beyond the current allocation */
+ if (cur_pfn >= nr_pages) {
+ /* Identity map remaining pages */
+ *identity += set_phys_range_identity(cur_pfn,
+ cur_pfn + size);
+ break;
+ }
+ if (cur_pfn + size > nr_pages)
+ size = nr_pages - cur_pfn;
+
+ remap_range_size = xen_find_pfn_range(list, map_size,
+ &remap_pfn);
+ if (!remap_range_size) {
+ pr_warning("Unable to find available pfn range, not remapping identity pages\n");
+ xen_set_identity_and_release_chunk(cur_pfn,
+ cur_pfn + left, nr_pages, identity, released);
+ break;
+ }
+ /* Adjust size to fit in current e820 RAM region */
+ if (size > remap_range_size)
+ size = remap_range_size;
+
+ if (!xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn)) {
+ WARN(1, "Failed to remap 1:1 memory cur_pfn=%ld size=%ld remap_pfn=%ld\n",
+ cur_pfn, size, remap_pfn);
+ xen_set_identity_and_release_chunk(cur_pfn,
+ cur_pfn + left, nr_pages, identity, released);
+ break;
+ }
+
+ /* Update variables to reflect new mappings. */
+ i += size;
+ remap_pfn += size;
+ *identity += size;
+ *remapped += size;
+ }
+
+ /*
+ * If the PFNs are currently mapped, the VA mapping also needs
+ * to be updated to be 1:1.
+ */
+ for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++)
+ (void)HYPERVISOR_update_va_mapping(
+ (unsigned long)__va(pfn << PAGE_SHIFT),
+ mfn_pte(pfn, PAGE_KERNEL_IO), 0);
+
+ return remap_pfn;
+}
+
+static unsigned long __init xen_set_identity_and_remap(
+ const struct e820entry *list, size_t map_size, unsigned long nr_pages,
+ unsigned long *released)
{
phys_addr_t start = 0;
- unsigned long released = 0;
unsigned long identity = 0;
+ unsigned long remapped = 0;
+ unsigned long last_pfn = nr_pages;
const struct e820entry *entry;
+ unsigned long num_released = 0;
int i;
/*
* Combine non-RAM regions and gaps until a RAM region (or the
* end of the map) is reached, then set the 1:1 map and
- * release the pages (if available) in those non-RAM regions.
+ * remap the memory in those non-RAM regions.
*
* The combined non-RAM regions are rounded to a whole number
* of pages so any partial pages are accessible via the 1:1
@@ -269,22 +491,24 @@ static unsigned long __init xen_set_identity_and_release(
end_pfn = PFN_UP(entry->addr);
if (start_pfn < end_pfn)
- xen_set_identity_and_release_chunk(
- start_pfn, end_pfn, nr_pages,
- &released, &identity);
-
+ last_pfn = xen_set_identity_and_remap_chunk(
+ list, map_size, start_pfn,
+ end_pfn, nr_pages, last_pfn,
+ &identity, &remapped,
+ &num_released);
start = end;
}
}
- if (released)
- printk(KERN_INFO "Released %lu pages of unused memory\n", released);
- if (identity)
- printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity);
+ *released = num_released;
- return released;
-}
+ pr_info("Set %ld page(s) to 1-1 mapping\n", identity);
+ pr_info("Remapped %ld page(s), last_pfn=%ld\n", remapped,
+ last_pfn);
+ pr_info("Released %ld page(s)\n", num_released);
+ return last_pfn;
+}
static unsigned long __init xen_get_max_pages(void)
{
unsigned long max_pages = MAX_DOMAIN_PAGES;
@@ -347,7 +571,6 @@ char * __init xen_memory_setup(void)
unsigned long max_pages;
unsigned long last_pfn = 0;
unsigned long extra_pages = 0;
- unsigned long populated;
int i;
int op;
@@ -392,20 +615,11 @@ char * __init xen_memory_setup(void)
extra_pages += max_pages - max_pfn;
/*
- * Set P2M for all non-RAM pages and E820 gaps to be identity
- * type PFNs. Any RAM pages that would be made inaccesible by
- * this are first released.
+ * Set identity map on non-RAM pages and remap the underlying RAM.
*/
- xen_released_pages = xen_set_identity_and_release(
- map, memmap.nr_entries, max_pfn);
-
- /*
- * Populate back the non-RAM pages and E820 gaps that had been
- * released. */
- populated = xen_populate_chunk(map, memmap.nr_entries,
- max_pfn, &last_pfn, xen_released_pages);
+ last_pfn = xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn,
+ &xen_released_pages);
- xen_released_pages -= populated;
extra_pages += xen_released_pages;
if (last_pfn > max_pfn) {
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 7005974c3ff3..8650cdb53209 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -37,6 +37,7 @@
#include <xen/hvc-console.h>
#include "xen-ops.h"
#include "mmu.h"
+#include "smp.h"
cpumask_var_t xen_cpu_initialized_map;
@@ -99,10 +100,14 @@ static void cpu_bringup(void)
wmb(); /* make sure everything is out */
}
-/* Note: cpu parameter is only relevant for PVH */
-static void cpu_bringup_and_idle(int cpu)
+/*
+ * Note: cpu parameter is only relevant for PVH. The reason for passing it
+ * is we can't do smp_processor_id until the percpu segments are loaded, for
+ * which we need the cpu number! So we pass it in rdi as first parameter.
+ */
+asmlinkage __visible void cpu_bringup_and_idle(int cpu)
{
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_XEN_PVH
if (xen_feature(XENFEAT_auto_translated_physmap) &&
xen_feature(XENFEAT_supervisor_mode_kernel))
xen_pvh_secondary_vcpu_init(cpu);
@@ -360,6 +365,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
struct desc_struct *gdt;
unsigned long gdt_mfn;
+ /* used to tell cpu_init() that it can proceed with initialization */
+ cpumask_set_cpu(cpu, cpu_callout_mask);
if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
return 0;
@@ -374,11 +381,10 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
ctxt->user_regs.fs = __KERNEL_PERCPU;
ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
#endif
- ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
-
memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
ctxt->flags = VGCF_IN_KERNEL;
ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
ctxt->user_regs.ds = __USER_DS;
@@ -413,15 +419,18 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
(unsigned long)xen_failsafe_callback;
ctxt->user_regs.cs = __KERNEL_CS;
per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
-#ifdef CONFIG_X86_32
}
-#else
- } else
- /* N.B. The user_regs.eip (cpu_bringup_and_idle) is called with
- * %rdi having the cpu number - which means are passing in
- * as the first parameter the cpu. Subtle!
+#ifdef CONFIG_XEN_PVH
+ else {
+ /*
+ * The vcpu comes on kernel page tables which have the NX pte
+ * bit set. This means before DS/SS is touched, NX in
+ * EFER must be set. Hence the following assembly glue code.
*/
+ ctxt->user_regs.eip = (unsigned long)xen_pvh_early_cpu_init;
ctxt->user_regs.rdi = cpu;
+ ctxt->user_regs.rsi = true; /* entry == true */
+ }
#endif
ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h
index c7c2d89efd76..963d62a35c82 100644
--- a/arch/x86/xen/smp.h
+++ b/arch/x86/xen/smp.h
@@ -8,4 +8,12 @@ extern void xen_send_IPI_allbutself(int vector);
extern void xen_send_IPI_all(int vector);
extern void xen_send_IPI_self(int vector);
+#ifdef CONFIG_XEN_PVH
+extern void xen_pvh_early_cpu_init(int cpu, bool entry);
+#else
+static inline void xen_pvh_early_cpu_init(int cpu, bool entry)
+{
+}
+#endif
+
#endif
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 0ba5f3b967f0..23b45eb9a89c 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -109,7 +109,7 @@ static bool xen_pvspin = true;
__visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
{
int irq = __this_cpu_read(lock_kicker_irq);
- struct xen_lock_waiting *w = &__get_cpu_var(lock_waiting);
+ struct xen_lock_waiting *w = this_cpu_ptr(&lock_waiting);
int cpu = smp_processor_id();
u64 start;
unsigned long flags;
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 5718b0b58b60..a1d430b112b3 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -80,7 +80,7 @@ static void get_runstate_snapshot(struct vcpu_runstate_info *res)
BUG_ON(preemptible());
- state = &__get_cpu_var(xen_runstate);
+ state = this_cpu_ptr(&xen_runstate);
/*
* The runstate info is always updated by the hypervisor on
@@ -123,7 +123,7 @@ static void do_stolen_accounting(void)
WARN_ON(state.state != RUNSTATE_running);
- snap = &__get_cpu_var(xen_runstate_snapshot);
+ snap = this_cpu_ptr(&xen_runstate_snapshot);
/* work out how much time the VCPU has not been runn*ing* */
runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
@@ -158,7 +158,7 @@ cycle_t xen_clocksource_read(void)
cycle_t ret;
preempt_disable_notrace();
- src = &__get_cpu_var(xen_vcpu)->time;
+ src = this_cpu_ptr(&xen_vcpu->time);
ret = pvclock_clocksource_read(src);
preempt_enable_notrace();
return ret;
@@ -397,7 +397,7 @@ static DEFINE_PER_CPU(struct xen_clock_event_device, xen_clock_events) = { .evt.
static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
{
- struct clock_event_device *evt = &__get_cpu_var(xen_clock_events).evt;
+ struct clock_event_device *evt = this_cpu_ptr(&xen_clock_events.evt);
irqreturn_t ret;
ret = IRQ_NONE;
@@ -460,7 +460,7 @@ void xen_setup_cpu_clockevents(void)
{
BUG_ON(preemptible());
- clockevents_register_device(&__get_cpu_var(xen_clock_events).evt);
+ clockevents_register_device(this_cpu_ptr(&xen_clock_events.evt));
}
void xen_timer_resume(void)
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 485b69585540..674b222544b7 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -47,6 +47,41 @@ ENTRY(startup_xen)
__FINIT
+#ifdef CONFIG_XEN_PVH
+/*
+ * xen_pvh_early_cpu_init() - early PVH VCPU initialization
+ * @cpu: this cpu number (%rdi)
+ * @entry: true if this is a secondary vcpu coming up on this entry
+ * point, false if this is the boot CPU being initialized for
+ * the first time (%rsi)
+ *
+ * Note: This is called as a function on the boot CPU, and is the entry point
+ * on the secondary CPU.
+ */
+ENTRY(xen_pvh_early_cpu_init)
+ mov %rsi, %r11
+
+ /* Gather features to see if NX implemented. */
+ mov $0x80000001, %eax
+ cpuid
+ mov %edx, %esi
+
+ mov $MSR_EFER, %ecx
+ rdmsr
+ bts $_EFER_SCE, %eax
+
+ bt $20, %esi
+ jnc 1f /* No NX, skip setting it */
+ bts $_EFER_NX, %eax
+1: wrmsr
+#ifdef CONFIG_SMP
+ cmp $0, %r11b
+ jne cpu_bringup_and_idle
+#endif
+ ret
+
+#endif /* CONFIG_XEN_PVH */
+
.pushsection .text
.balign PAGE_SIZE
ENTRY(hypercall_page)
@@ -124,6 +159,7 @@ NEXT_HYPERCALL(arch_6)
ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
.quad _PAGE_PRESENT; .quad _PAGE_PRESENT)
ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1)
+ ELFNOTE(Xen, XEN_ELFNOTE_MOD_START_PFN, .long 1)
ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, _ASM_PTR __HYPERVISOR_VIRT_START)
ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, _ASM_PTR 0)