From 1c5de1939c204bde9cce87f4eb3d26e9f9eb732b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 9 Feb 2009 12:05:49 -0800 Subject: xen: add privcmd driver The privcmd interface in xenfs allows the tool stack in the privileged domain to get fairly direct access to the hypervisor in order to do various management things such as domain construction. [ Impact: new xenfs interface for privileged operations ] Signed-off-by: Jeremy Fitzhardinge --- include/xen/Kbuild | 1 + include/xen/privcmd.h | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 include/xen/privcmd.h (limited to 'include/xen') diff --git a/include/xen/Kbuild b/include/xen/Kbuild index 4e65c16a445b..84ad8f02fee5 100644 --- a/include/xen/Kbuild +++ b/include/xen/Kbuild @@ -1 +1,2 @@ header-y += evtchn.h +header-y += privcmd.h diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h new file mode 100644 index 000000000000..b42cdfd92fee --- /dev/null +++ b/include/xen/privcmd.h @@ -0,0 +1,80 @@ +/****************************************************************************** + * privcmd.h + * + * Interface to /proc/xen/privcmd. + * + * Copyright (c) 2003-2005, K A Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __LINUX_PUBLIC_PRIVCMD_H__ +#define __LINUX_PUBLIC_PRIVCMD_H__ + +#include + +typedef unsigned long xen_pfn_t; + +#ifndef __user +#define __user +#endif + +struct privcmd_hypercall { + __u64 op; + __u64 arg[5]; +}; + +struct privcmd_mmap_entry { + __u64 va; + __u64 mfn; + __u64 npages; +}; + +struct privcmd_mmap { + int num; + domid_t dom; /* target domain */ + struct privcmd_mmap_entry __user *entry; +}; + +struct privcmd_mmapbatch { + int num; /* number of pages to populate */ + domid_t dom; /* target domain */ + __u64 addr; /* virtual address */ + xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */ +}; + +/* + * @cmd: IOCTL_PRIVCMD_HYPERCALL + * @arg: &privcmd_hypercall_t + * Return: Value returned from execution of the specified hypercall. + */ +#define IOCTL_PRIVCMD_HYPERCALL \ + _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall)) +#define IOCTL_PRIVCMD_MMAP \ + _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap)) +#define IOCTL_PRIVCMD_MMAPBATCH \ + _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch)) + +#endif /* __LINUX_PUBLIC_PRIVCMD_H__ */ -- cgit v1.2.3-59-g8ed1b From de1ef2065c4675ab1062ebc8d1cb6c5f42b61d04 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Thu, 21 May 2009 10:09:46 +0100 Subject: xen/privcmd: move remap_domain_mfn_range() to core xen code and export. This allows xenfs to be built as a module, previously it required flush_tlb_all and arbitrary_virt_to_machine to be exported. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge --- arch/x86/xen/mmu.c | 66 ++++++++++++++++++++++++++++++++++++ drivers/xen/xenfs/privcmd.c | 81 +++++---------------------------------------- include/xen/xen-ops.h | 5 +++ 3 files changed, 79 insertions(+), 73 deletions(-) (limited to 'include/xen') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 1ceb0f2fa0af..f08ea045620f 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2265,6 +2265,72 @@ void __init xen_hvm_init_mmu_ops(void) } #endif +#define REMAP_BATCH_SIZE 16 + +struct remap_data { + unsigned long mfn; + pgprot_t prot; + struct mmu_update *mmu_update; +}; + +static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, + unsigned long addr, void *data) +{ + struct remap_data *rmd = data; + pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot)); + + rmd->mmu_update->ptr = arbitrary_virt_to_machine(ptep).maddr; + rmd->mmu_update->val = pte_val_ma(pte); + rmd->mmu_update++; + + return 0; +} + +int xen_remap_domain_mfn_range(struct vm_area_struct *vma, + unsigned long addr, + unsigned long mfn, int nr, + pgprot_t prot, unsigned domid) +{ + struct remap_data rmd; + struct mmu_update mmu_update[REMAP_BATCH_SIZE]; + int batch; + unsigned long range; + int err = 0; + + prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); + + vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; + + rmd.mfn = mfn; + rmd.prot = prot; + + while (nr) { + batch = min(REMAP_BATCH_SIZE, nr); + range = (unsigned long)batch << PAGE_SHIFT; + + rmd.mmu_update = mmu_update; + err = apply_to_page_range(vma->vm_mm, addr, range, + remap_area_mfn_pte_fn, &rmd); + if (err) + goto out; + + err = -EFAULT; + if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0) + goto out; + + nr -= batch; + addr += range; + } + + err = 0; +out: + + flush_tlb_all(); + + return err; +} +EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); + #ifdef CONFIG_XEN_DEBUG_FS static struct dentry *d_mmu_debug; diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c index 438223ae0fc3..f80be7f6eb95 100644 --- a/drivers/xen/xenfs/privcmd.c +++ b/drivers/xen/xenfs/privcmd.c @@ -31,76 +31,12 @@ #include #include #include - -#define REMAP_BATCH_SIZE 16 +#include #ifndef HAVE_ARCH_PRIVCMD_MMAP static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); #endif -struct remap_data { - unsigned long mfn; - pgprot_t prot; - struct mmu_update *mmu_update; -}; - -static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, - unsigned long addr, void *data) -{ - struct remap_data *rmd = data; - pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot)); - - rmd->mmu_update->ptr = arbitrary_virt_to_machine(ptep).maddr; - rmd->mmu_update->val = pte_val_ma(pte); - rmd->mmu_update++; - - return 0; -} - -static int remap_domain_mfn_range(struct vm_area_struct *vma, - unsigned long addr, - unsigned long mfn, int nr, - pgprot_t prot, unsigned domid) -{ - struct remap_data rmd; - struct mmu_update mmu_update[REMAP_BATCH_SIZE]; - int batch; - unsigned long range; - int err = 0; - - prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); - - vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; - - rmd.mfn = mfn; - rmd.prot = prot; - - while (nr) { - batch = min(REMAP_BATCH_SIZE, nr); - range = (unsigned long)batch << PAGE_SHIFT; - - rmd.mmu_update = mmu_update; - err = apply_to_page_range(vma->vm_mm, addr, range, - remap_area_mfn_pte_fn, &rmd); - if (err) - goto out; - - err = -EFAULT; - if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0) - goto out; - - nr -= batch; - addr += range; - } - - err = 0; -out: - - flush_tlb_all(); - - return err; -} - static long privcmd_ioctl_hypercall(void __user *udata) { struct privcmd_hypercall hypercall; @@ -233,11 +169,11 @@ static int mmap_mfn_range(void *data, void *state) ((msg->va+(msg->npages< vma->vm_end)) return -EINVAL; - rc = remap_domain_mfn_range(vma, - msg->va & PAGE_MASK, - msg->mfn, msg->npages, - vma->vm_page_prot, - st->domain); + rc = xen_remap_domain_mfn_range(vma, + msg->va & PAGE_MASK, + msg->mfn, msg->npages, + vma->vm_page_prot, + st->domain); if (rc < 0) return rc; @@ -315,9 +251,8 @@ static int mmap_batch_fn(void *data, void *state) xen_pfn_t *mfnp = data; struct mmap_batch_state *st = state; - if (remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, - *mfnp, 1, - st->vma->vm_page_prot, st->domain) < 0) { + if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, + st->vma->vm_page_prot, st->domain) < 0) { *mfnp |= 0xf0000000U; st->err++; } diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 351f4051f6d8..98b92154a264 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -23,4 +23,9 @@ int xen_create_contiguous_region(unsigned long vstart, unsigned int order, void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order); +int xen_remap_domain_mfn_range(struct vm_area_struct *vma, + unsigned long addr, + unsigned long mfn, int nr, + pgprot_t prot, unsigned domid); + #endif /* INCLUDE_XEN_OPS_H */ -- cgit v1.2.3-59-g8ed1b From 35ae11fd146384d222f3bb1f17eed1970cc92c36 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 6 Feb 2009 19:09:48 -0800 Subject: xen: Use host-provided E820 map Rather than simply using a flat memory map from Xen, use its provided E820 map. This allows the domain builder to tell the domain to reserve space for more pages than those initially provided at domain-build time. It also allows the host to specify holes in the address space (for PCI-passthrough, for example). Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge --- arch/x86/xen/setup.c | 38 ++++++++++++++++++++++++++++++++++++-- include/xen/interface/memory.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 2 deletions(-) (limited to 'include/xen') diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 328b00305426..dd2eb2a9303f 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -107,13 +108,46 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn, char * __init xen_memory_setup(void) { + static struct e820entry map[E820MAX] __initdata; + unsigned long max_pfn = xen_start_info->nr_pages; + unsigned long long mem_end; + int rc; + struct xen_memory_map memmap; + int i; max_pfn = min(MAX_DOMAIN_PAGES, max_pfn); + mem_end = PFN_PHYS(max_pfn); + + memmap.nr_entries = E820MAX; + set_xen_guest_handle(memmap.buffer, map); + + rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); + if (rc == -ENOSYS) { + memmap.nr_entries = 1; + map[0].addr = 0ULL; + map[0].size = mem_end; + /* 8MB slack (to balance backend allocations). */ + map[0].size += 8ULL << 20; + map[0].type = E820_RAM; + rc = 0; + } + BUG_ON(rc); e820.nr_map = 0; - - e820_add_region(0, PFN_PHYS((u64)max_pfn), E820_RAM); + for (i = 0; i < memmap.nr_entries; i++) { + unsigned long long end = map[i].addr + map[i].size; + if (map[i].type == E820_RAM) { + if (map[i].addr > mem_end) + continue; + if (end > mem_end) { + /* Truncate region to max_mem. */ + map[i].size -= end - mem_end; + } + } + if (map[i].size > 0) + e820_add_region(map[i].addr, map[i].size, map[i].type); + } /* * Even though this is normal, usable memory under Xen, reserve diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h index d3938d3e71f8..d7a6c13bde69 100644 --- a/include/xen/interface/memory.h +++ b/include/xen/interface/memory.h @@ -186,6 +186,35 @@ struct xen_translate_gpfn_list { }; DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list); +/* + * Returns the pseudo-physical memory map as it was when the domain + * was started (specified by XENMEM_set_memory_map). + * arg == addr of struct xen_memory_map. + */ +#define XENMEM_memory_map 9 +struct xen_memory_map { + /* + * On call the number of entries which can be stored in buffer. On + * return the number of entries which have been stored in + * buffer. + */ + unsigned int nr_entries; + + /* + * Entries in the buffer are in the same format as returned by the + * BIOS INT 0x15 EAX=0xE820 call. + */ + GUEST_HANDLE(void) buffer; +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map); + +/* + * Returns the real physical memory map. Passes the same structure as + * XENMEM_memory_map. + * arg == addr of struct xen_memory_map. + */ +#define XENMEM_machine_memory_map 10 + /* * Prevent the balloon driver from changing the memory reservation -- cgit v1.2.3-59-g8ed1b