From 667a0a06c99d5291433b869ed35dabdd95ba1453 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 18 Dec 2014 14:48:15 +0000 Subject: mm: provide a find_special_page vma operation The optional find_special_page VMA operation is used to lookup the pages backing a VMA. This is useful in cases where the normal mechanisms for finding the page don't work. This is only called if the PTE is special. One use case is a Xen PV guest mapping foreign pages into userspace. In a Xen PV guest, the PTEs contain MFNs so get_user_pages() (for example) must do an MFN to PFN (M2P) lookup before it can get the page. For foreign pages (those owned by another guest) the M2P lookup returns the PFN as seen by the foreign guest (which would be completely the wrong page for the local guest). This cannot be fixed up improving the M2P lookup since one MFN may be mapped onto two or more pages so getting the right page is impossible given just the MFN. Signed-off-by: David Vrabel Acked-by: Andrew Morton --- include/linux/mm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 80fc92a49649..9269af7349fe 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -290,6 +290,14 @@ struct vm_operations_struct { /* called by sys_remap_file_pages() to populate non-linear mapping */ int (*remap_pages)(struct vm_area_struct *vma, unsigned long addr, unsigned long size, pgoff_t pgoff); + + /* + * Called by vm_normal_page() for special PTEs to find the + * page for @addr. This is useful if the default behavior + * (using pte_page()) would not find the correct page. + */ + struct page *(*find_special_page)(struct vm_area_struct *vma, + unsigned long addr); }; struct mmu_gather; -- cgit v1.2.3-59-g8ed1b From d8ac3dd41aea245f65465449efc35dd3ac71e91d Mon Sep 17 00:00:00 2001 From: Jennifer Herbert Date: Mon, 5 Jan 2015 13:24:09 +0000 Subject: mm: add 'foreign' alias for the 'pinned' page flag The foreign page flag will be used by Xen guests to mark pages that have grant mappings of frames from other (foreign) guests. The foreign flag is an alias for the existing (Xen-specific) pinned flag. This is safe because pinned is only used on pages used for page tables and these cannot also be foreign. Signed-off-by: Jennifer Herbert Acked-by: Andrew Morton Signed-off-by: David Vrabel --- include/linux/page-flags.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e1f5fcd79792..5ed7bdaf22d5 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -121,8 +121,12 @@ enum pageflags { PG_fscache = PG_private_2, /* page backed by cache */ /* XEN */ + /* Pinned in Xen as a read-only pagetable page. */ PG_pinned = PG_owner_priv_1, + /* Pinned as part of domain save (see xen_mm_pin_all()). */ PG_savepinned = PG_dirty, + /* Has a grant mapping of another (foreign) domain's page. */ + PG_foreign = PG_owner_priv_1, /* SLOB */ PG_slob_free = PG_private, @@ -215,6 +219,7 @@ __PAGEFLAG(Slab, slab) PAGEFLAG(Checked, checked) /* Used by some filesystems */ PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */ PAGEFLAG(SavePinned, savepinned); /* Xen */ +PAGEFLAG(Foreign, foreign); /* Xen */ PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked) __SETPAGEFLAG(SwapBacked, swapbacked) -- cgit v1.2.3-59-g8ed1b From 853d0289340026b30f93fd0e768340221d4e605c Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 5 Jan 2015 14:13:41 +0000 Subject: xen/grant-table: pre-populate kernel unmap ops for xen_gnttab_unmap_refs() When unmapping grants, instead of converting the kernel map ops to unmap ops on the fly, pre-populate the set of unmap ops. This allows the grant unmap for the kernel mappings to be trivially batched in the future. Signed-off-by: David Vrabel Reviewed-by: Stefano Stabellini --- arch/arm/include/asm/xen/page.h | 2 +- arch/arm/xen/p2m.c | 2 +- arch/x86/include/asm/xen/page.h | 2 +- arch/x86/xen/p2m.c | 21 ++++++++++----------- drivers/xen/gntdev.c | 20 ++++++++++++++------ drivers/xen/grant-table.c | 4 ++-- include/xen/grant_table.h | 2 +- 7 files changed, 30 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h index 68c739b3fdf4..2f7e6ff67d51 100644 --- a/arch/arm/include/asm/xen/page.h +++ b/arch/arm/include/asm/xen/page.h @@ -92,7 +92,7 @@ extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, struct page **pages, unsigned int count); extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, - struct gnttab_map_grant_ref *kmap_ops, + struct gnttab_unmap_grant_ref *kunmap_ops, struct page **pages, unsigned int count); bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index 054857776254..cb7a14c5cd69 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -102,7 +102,7 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping); int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, - struct gnttab_map_grant_ref *kmap_ops, + struct gnttab_unmap_grant_ref *kunmap_ops, struct page **pages, unsigned int count) { int i; diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 5eea09915a15..e9f52fe2d56a 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -55,7 +55,7 @@ extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count); extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, - struct gnttab_map_grant_ref *kmap_ops, + struct gnttab_unmap_grant_ref *kunmap_ops, struct page **pages, unsigned int count); extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 70fb5075c901..df40b2888eae 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -816,7 +816,7 @@ static struct page *m2p_find_override(unsigned long mfn) } static int m2p_remove_override(struct page *page, - struct gnttab_map_grant_ref *kmap_op, + struct gnttab_unmap_grant_ref *kunmap_op, unsigned long mfn) { unsigned long flags; @@ -840,7 +840,7 @@ static int m2p_remove_override(struct page *page, list_del(&page->lru); spin_unlock_irqrestore(&m2p_override_lock, flags); - if (kmap_op != NULL) { + if (kunmap_op != NULL) { if (!PageHighMem(page)) { struct multicall_space mcs; struct gnttab_unmap_and_replace *unmap_op; @@ -855,13 +855,13 @@ static int m2p_remove_override(struct page *page, * issued. In this case handle is going to -1 because * it hasn't been modified yet. */ - if (kmap_op->handle == -1) + if (kunmap_op->handle == -1) xen_mc_flush(); /* * Now if kmap_op->handle is negative it means that the * hypercall actually returned an error. */ - if (kmap_op->handle == GNTST_general_error) { + if (kunmap_op->handle == GNTST_general_error) { pr_warn("m2p_remove_override: pfn %lx mfn %lx, failed to modify kernel mappings", pfn, mfn); put_balloon_scratch_page(); @@ -873,9 +873,9 @@ static int m2p_remove_override(struct page *page, mcs = __xen_mc_entry( sizeof(struct gnttab_unmap_and_replace)); unmap_op = mcs.args; - unmap_op->host_addr = kmap_op->host_addr; + unmap_op->host_addr = kunmap_op->host_addr; unmap_op->new_addr = scratch_page_address; - unmap_op->handle = kmap_op->handle; + unmap_op->handle = kunmap_op->handle; MULTI_grant_table_op(mcs.mc, GNTTABOP_unmap_and_replace, unmap_op, 1); @@ -887,7 +887,6 @@ static int m2p_remove_override(struct page *page, xen_mc_issue(PARAVIRT_LAZY_MMU); - kmap_op->host_addr = 0; put_balloon_scratch_page(); } } @@ -912,7 +911,7 @@ static int m2p_remove_override(struct page *page, } int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, - struct gnttab_map_grant_ref *kmap_ops, + struct gnttab_unmap_grant_ref *kunmap_ops, struct page **pages, unsigned int count) { int i, ret = 0; @@ -921,7 +920,7 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, if (xen_feature(XENFEAT_auto_translated_physmap)) return 0; - if (kmap_ops && + if (kunmap_ops && !in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { arch_enter_lazy_mmu_mode(); @@ -942,8 +941,8 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, ClearPagePrivate(pages[i]); set_phys_to_machine(pfn, pages[i]->index); - if (kmap_ops) - ret = m2p_remove_override(pages[i], &kmap_ops[i], mfn); + if (kunmap_ops) + ret = m2p_remove_override(pages[i], &kunmap_ops[i], mfn); if (ret) goto out; } diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 073b4a19a8b0..6444172f2842 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -91,6 +91,7 @@ struct grant_map { struct gnttab_map_grant_ref *map_ops; struct gnttab_unmap_grant_ref *unmap_ops; struct gnttab_map_grant_ref *kmap_ops; + struct gnttab_unmap_grant_ref *kunmap_ops; struct page **pages; }; @@ -124,6 +125,7 @@ static void gntdev_free_map(struct grant_map *map) kfree(map->map_ops); kfree(map->unmap_ops); kfree(map->kmap_ops); + kfree(map->kunmap_ops); kfree(map); } @@ -140,11 +142,13 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) add->map_ops = kcalloc(count, sizeof(add->map_ops[0]), GFP_KERNEL); add->unmap_ops = kcalloc(count, sizeof(add->unmap_ops[0]), GFP_KERNEL); add->kmap_ops = kcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL); + add->kunmap_ops = kcalloc(count, sizeof(add->kunmap_ops[0]), GFP_KERNEL); add->pages = kcalloc(count, sizeof(add->pages[0]), GFP_KERNEL); if (NULL == add->grants || NULL == add->map_ops || NULL == add->unmap_ops || NULL == add->kmap_ops || + NULL == add->kunmap_ops || NULL == add->pages) goto err; @@ -155,6 +159,7 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) add->map_ops[i].handle = -1; add->unmap_ops[i].handle = -1; add->kmap_ops[i].handle = -1; + add->kunmap_ops[i].handle = -1; } add->index = 0; @@ -280,6 +285,8 @@ static int map_grant_pages(struct grant_map *map) map->flags | GNTMAP_host_map, map->grants[i].ref, map->grants[i].domid); + gnttab_set_unmap_op(&map->kunmap_ops[i], address, + map->flags | GNTMAP_host_map, -1); } } @@ -290,13 +297,14 @@ static int map_grant_pages(struct grant_map *map) return err; for (i = 0; i < map->count; i++) { - if (map->map_ops[i].status) + if (map->map_ops[i].status) { err = -EINVAL; - else { - BUG_ON(map->map_ops[i].handle == -1); - map->unmap_ops[i].handle = map->map_ops[i].handle; - pr_debug("map handle=%d\n", map->map_ops[i].handle); + continue; } + + map->unmap_ops[i].handle = map->map_ops[i].handle; + if (use_ptemod) + map->kunmap_ops[i].handle = map->kmap_ops[i].handle; } return err; } @@ -316,7 +324,7 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages) } err = gnttab_unmap_refs(map->unmap_ops + offset, - use_ptemod ? map->kmap_ops + offset : NULL, map->pages + offset, + use_ptemod ? map->kunmap_ops + offset : NULL, map->pages + offset, pages); if (err) return err; diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 7786291ba229..999d7abdbcec 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -738,7 +738,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, EXPORT_SYMBOL_GPL(gnttab_map_refs); int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, - struct gnttab_map_grant_ref *kmap_ops, + struct gnttab_unmap_grant_ref *kunmap_ops, struct page **pages, unsigned int count) { int ret; @@ -747,7 +747,7 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, if (ret) return ret; - return clear_foreign_p2m_mapping(unmap_ops, kmap_ops, pages, count); + return clear_foreign_p2m_mapping(unmap_ops, kunmap_ops, pages, count); } EXPORT_SYMBOL_GPL(gnttab_unmap_refs); diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 3387465b9caa..7235d8f35459 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -167,7 +167,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count); int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, - struct gnttab_map_grant_ref *kunmap_ops, + struct gnttab_unmap_grant_ref *kunmap_ops, struct page **pages, unsigned int count); /* Perform a batch of grant map/copy operations. Retry every batch slot -- cgit v1.2.3-59-g8ed1b From ff4b156f166b3931894d2a8b5cdba6cdf4da0618 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 8 Jan 2015 18:06:01 +0000 Subject: xen/grant-table: add helpers for allocating pages Add gnttab_alloc_pages() and gnttab_free_pages() to allocate/free pages suitable to for granted maps. Signed-off-by: David Vrabel Reviewed-by: Stefano Stabellini --- drivers/block/xen-blkback/blkback.c | 8 ++++---- drivers/net/xen-netback/interface.c | 7 +++---- drivers/xen/gntdev.c | 4 ++-- drivers/xen/grant-table.c | 29 +++++++++++++++++++++++++++++ drivers/xen/xen-scsiback.c | 6 +++--- include/xen/grant_table.h | 3 +++ 6 files changed, 44 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 63fc7f06a014..908e630240bd 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -100,7 +100,7 @@ module_param(log_stats, int, 0644); #define BLKBACK_INVALID_HANDLE (~0) -/* Number of free pages to remove on each call to free_xenballooned_pages */ +/* Number of free pages to remove on each call to gnttab_free_pages */ #define NUM_BATCH_FREE_PAGES 10 static inline int get_free_page(struct xen_blkif *blkif, struct page **page) @@ -111,7 +111,7 @@ static inline int get_free_page(struct xen_blkif *blkif, struct page **page) if (list_empty(&blkif->free_pages)) { BUG_ON(blkif->free_pages_num != 0); spin_unlock_irqrestore(&blkif->free_pages_lock, flags); - return alloc_xenballooned_pages(1, page, false); + return gnttab_alloc_pages(1, page); } BUG_ON(blkif->free_pages_num == 0); page[0] = list_first_entry(&blkif->free_pages, struct page, lru); @@ -151,14 +151,14 @@ static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num) blkif->free_pages_num--; if (++num_pages == NUM_BATCH_FREE_PAGES) { spin_unlock_irqrestore(&blkif->free_pages_lock, flags); - free_xenballooned_pages(num_pages, page); + gnttab_free_pages(num_pages, page); spin_lock_irqsave(&blkif->free_pages_lock, flags); num_pages = 0; } } spin_unlock_irqrestore(&blkif->free_pages_lock, flags); if (num_pages != 0) - free_xenballooned_pages(num_pages, page); + gnttab_free_pages(num_pages, page); } #define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page))) diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 9259a732e8a4..2e07f8433412 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -483,9 +483,8 @@ int xenvif_init_queue(struct xenvif_queue *queue) * better enable it. The long term solution would be to use just a * bunch of valid page descriptors, without dependency on ballooning */ - err = alloc_xenballooned_pages(MAX_PENDING_REQS, - queue->mmap_pages, - false); + err = gnttab_alloc_pages(MAX_PENDING_REQS, + queue->mmap_pages); if (err) { netdev_err(queue->vif->dev, "Could not reserve mmap_pages\n"); return -ENOMEM; @@ -662,7 +661,7 @@ void xenvif_disconnect(struct xenvif *vif) */ void xenvif_deinit_queue(struct xenvif_queue *queue) { - free_xenballooned_pages(MAX_PENDING_REQS, queue->mmap_pages); + gnttab_free_pages(MAX_PENDING_REQS, queue->mmap_pages); } void xenvif_free(struct xenvif *vif) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 6444172f2842..8cc3f069a10f 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -119,7 +119,7 @@ static void gntdev_free_map(struct grant_map *map) return; if (map->pages) - free_xenballooned_pages(map->count, map->pages); + gnttab_free_pages(map->count, map->pages); kfree(map->pages); kfree(map->grants); kfree(map->map_ops); @@ -152,7 +152,7 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) NULL == add->pages) goto err; - if (alloc_xenballooned_pages(count, add->pages, false /* lowmem */)) + if (gnttab_alloc_pages(count, add->pages)) goto err; for (i = 0; i < count; i++) { diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 999d7abdbcec..b4f93c490f83 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -671,6 +672,34 @@ void gnttab_free_auto_xlat_frames(void) } EXPORT_SYMBOL_GPL(gnttab_free_auto_xlat_frames); +/** + * gnttab_alloc_pages - alloc pages suitable for grant mapping into + * @nr_pages: number of pages to alloc + * @pages: returns the pages + */ +int gnttab_alloc_pages(int nr_pages, struct page **pages) +{ + int ret; + + ret = alloc_xenballooned_pages(nr_pages, pages, false); + if (ret < 0) + return ret; + + return 0; +} +EXPORT_SYMBOL(gnttab_alloc_pages); + +/** + * gnttab_free_pages - free pages allocated by gnttab_alloc_pages() + * @nr_pages; number of pages to free + * @pages: the pages + */ +void gnttab_free_pages(int nr_pages, struct page **pages) +{ + free_xenballooned_pages(nr_pages, pages); +} +EXPORT_SYMBOL(gnttab_free_pages); + /* Handling of paged out grant targets (GNTST_eagain) */ #define MAX_DELAY 256 static inline void diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index e999496eda3e..ecd540a7a562 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -227,7 +227,7 @@ static void put_free_pages(struct page **page, int num) return; if (i > scsiback_max_buffer_pages) { n = min(num, i - scsiback_max_buffer_pages); - free_xenballooned_pages(n, page + num - n); + gnttab_free_pages(n, page + num - n); n = num - n; } spin_lock_irqsave(&free_pages_lock, flags); @@ -244,7 +244,7 @@ static int get_free_page(struct page **page) spin_lock_irqsave(&free_pages_lock, flags); if (list_empty(&scsiback_free_pages)) { spin_unlock_irqrestore(&free_pages_lock, flags); - return alloc_xenballooned_pages(1, page, false); + return gnttab_alloc_pages(1, page); } page[0] = list_first_entry(&scsiback_free_pages, struct page, lru); list_del(&page[0]->lru); @@ -2106,7 +2106,7 @@ static void __exit scsiback_exit(void) while (free_pages_num) { if (get_free_page(&page)) BUG(); - free_xenballooned_pages(1, &page); + gnttab_free_pages(1, &page); } scsiback_deregister_configfs(); xenbus_unregister_driver(&scsiback_driver); diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 7235d8f35459..949803e20872 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -163,6 +163,9 @@ void gnttab_free_auto_xlat_frames(void); #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) +int gnttab_alloc_pages(int nr_pages, struct page **pages); +void gnttab_free_pages(int nr_pages, struct page **pages); + int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count); -- cgit v1.2.3-59-g8ed1b From 8da7633f168b5428e2cfb7342408b2c44088f5df Mon Sep 17 00:00:00 2001 From: Jennifer Herbert Date: Wed, 24 Dec 2014 14:17:06 +0000 Subject: xen: mark grant mapped pages as foreign Use the "foreign" page flag to mark pages that have a grant map. Use page->private to store information of the grant (the granting domain and the grant reference). Signed-off-by: Jennifer Herbert Reviewed-by: Stefano Stabellini Signed-off-by: David Vrabel --- arch/x86/xen/p2m.c | 7 ------- drivers/xen/grant-table.c | 43 +++++++++++++++++++++++++++++++++++++++++-- include/xen/grant_table.h | 20 ++++++++++++++++++++ 3 files changed, 61 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index a8691cb08420..f18fd1d411f6 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -681,12 +681,8 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, } pfn = page_to_pfn(pages[i]); - WARN_ON(PagePrivate(pages[i])); WARN(pfn_to_mfn(pfn) != INVALID_P2M_ENTRY, "page must be ballooned"); - SetPagePrivate(pages[i]); - set_page_private(pages[i], mfn); - if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) { ret = -ENOMEM; goto out; @@ -716,9 +712,6 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, goto out; } - set_page_private(pages[i], INVALID_P2M_ENTRY); - WARN_ON(!PagePrivate(pages[i])); - ClearPagePrivate(pages[i]); set_phys_to_machine(pfn, INVALID_P2M_ENTRY); } if (kunmap_ops) diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index b4f93c490f83..89dcca448bb6 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -679,12 +679,27 @@ EXPORT_SYMBOL_GPL(gnttab_free_auto_xlat_frames); */ int gnttab_alloc_pages(int nr_pages, struct page **pages) { + int i; int ret; ret = alloc_xenballooned_pages(nr_pages, pages, false); if (ret < 0) return ret; + for (i = 0; i < nr_pages; i++) { +#if BITS_PER_LONG < 64 + struct xen_page_foreign *foreign; + + foreign = kzalloc(sizeof(*foreign), GFP_KERNEL); + if (!foreign) { + gnttab_free_pages(nr_pages, pages); + return -ENOMEM; + } + set_page_private(pages[i], (unsigned long)foreign); +#endif + SetPagePrivate(pages[i]); + } + return 0; } EXPORT_SYMBOL(gnttab_alloc_pages); @@ -696,6 +711,16 @@ EXPORT_SYMBOL(gnttab_alloc_pages); */ void gnttab_free_pages(int nr_pages, struct page **pages) { + int i; + + for (i = 0; i < nr_pages; i++) { + if (PagePrivate(pages[i])) { +#if BITS_PER_LONG < 64 + kfree((void *)page_private(pages[i])); +#endif + ClearPagePrivate(pages[i]); + } + } free_xenballooned_pages(nr_pages, pages); } EXPORT_SYMBOL(gnttab_free_pages); @@ -756,12 +781,22 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, if (ret) return ret; - /* Retry eagain maps */ - for (i = 0; i < count; i++) + for (i = 0; i < count; i++) { + /* Retry eagain maps */ if (map_ops[i].status == GNTST_eagain) gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, map_ops + i, &map_ops[i].status, __func__); + if (map_ops[i].status == GNTST_okay) { + struct xen_page_foreign *foreign; + + SetPageForeign(pages[i]); + foreign = xen_page_foreign(pages[i]); + foreign->domid = map_ops[i].dom; + foreign->gref = map_ops[i].ref; + } + } + return set_foreign_p2m_mapping(map_ops, kmap_ops, pages, count); } EXPORT_SYMBOL_GPL(gnttab_map_refs); @@ -770,12 +805,16 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, struct gnttab_unmap_grant_ref *kunmap_ops, struct page **pages, unsigned int count) { + unsigned int i; int ret; ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count); if (ret) return ret; + for (i = 0; i < count; i++) + ClearPageForeign(pages[i]); + return clear_foreign_p2m_mapping(unmap_ops, kunmap_ops, pages, count); } EXPORT_SYMBOL_GPL(gnttab_unmap_refs); diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 949803e20872..d3bef563e8da 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -45,6 +45,8 @@ #include #include +#include +#include #define GNTTAB_RESERVED_XENSTORE 1 @@ -185,4 +187,22 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, void gnttab_batch_map(struct gnttab_map_grant_ref *batch, unsigned count); void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count); + +struct xen_page_foreign { + domid_t domid; + grant_ref_t gref; +}; + +static inline struct xen_page_foreign *xen_page_foreign(struct page *page) +{ + if (!PageForeign(page)) + return NULL; +#if BITS_PER_LONG < 64 + return (struct xen_page_foreign *)page->private; +#else + BUILD_BUG_ON(sizeof(struct xen_page_foreign) > BITS_PER_LONG); + return (struct xen_page_foreign *)&page->private; +#endif +} + #endif /* __ASM_GNTTAB_H__ */ -- cgit v1.2.3-59-g8ed1b From 3f9f1c67572f5e5e6dc84216d48d1480f3c4fcf6 Mon Sep 17 00:00:00 2001 From: Jennifer Herbert Date: Tue, 9 Dec 2014 18:28:37 +0000 Subject: xen/grant-table: add a mechanism to safely unmap pages that are in use Introduce gnttab_unmap_refs_async() that can be used to safely unmap pages that may be in use (ref count > 1). If the pages are in use the unmap is deferred and retried later. This polling is not very clever but it should be good enough if the cases where the delay is necessary are rare. The initial delay is 5 ms and is increased linearly on each subsequent retry (to reduce load if the page is in use for a long time). This is needed to allow block backends using grant mapping to safely use network storage (block or filesystem based such as iSCSI or NFS). The network storage driver may complete a block request whilst there is a queued network packet retry (because the ack from the remote end races with deciding to queue the retry). The pages for the retried packet would be grant unmapped and the network driver (or hardware) would access the unmapped page. Signed-off-by: Jennifer Herbert Acked-by: Stefano Stabellini Signed-off-by: David Vrabel --- drivers/xen/grant-table.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ include/xen/grant_table.h | 18 ++++++++++++++++++ 2 files changed, 62 insertions(+) (limited to 'include') diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 89dcca448bb6..17972fbacddc 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -819,6 +820,49 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, } EXPORT_SYMBOL_GPL(gnttab_unmap_refs); +#define GNTTAB_UNMAP_REFS_DELAY 5 + +static void __gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item); + +static void gnttab_unmap_work(struct work_struct *work) +{ + struct gntab_unmap_queue_data + *unmap_data = container_of(work, + struct gntab_unmap_queue_data, + gnttab_work.work); + if (unmap_data->age != UINT_MAX) + unmap_data->age++; + __gnttab_unmap_refs_async(unmap_data); +} + +static void __gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item) +{ + int ret; + int pc; + + for (pc = 0; pc < item->count; pc++) { + if (page_count(item->pages[pc]) > 1) { + unsigned long delay = GNTTAB_UNMAP_REFS_DELAY * (item->age + 1); + schedule_delayed_work(&item->gnttab_work, + msecs_to_jiffies(delay)); + return; + } + } + + ret = gnttab_unmap_refs(item->unmap_ops, item->kunmap_ops, + item->pages, item->count); + item->done(ret, item); +} + +void gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item) +{ + INIT_DELAYED_WORK(&item->gnttab_work, gnttab_unmap_work); + item->age = 0; + + __gnttab_unmap_refs_async(item); +} +EXPORT_SYMBOL_GPL(gnttab_unmap_refs_async); + static int gnttab_map_frames_v1(xen_pfn_t *frames, unsigned int nr_gframes) { int rc; diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index d3bef563e8da..143ca5ffab7a 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -60,6 +60,22 @@ struct gnttab_free_callback { u16 count; }; +struct gntab_unmap_queue_data; + +typedef void (*gnttab_unmap_refs_done)(int result, struct gntab_unmap_queue_data *data); + +struct gntab_unmap_queue_data +{ + struct delayed_work gnttab_work; + void *data; + gnttab_unmap_refs_done done; + struct gnttab_unmap_grant_ref *unmap_ops; + struct gnttab_unmap_grant_ref *kunmap_ops; + struct page **pages; + unsigned int count; + unsigned int age; +}; + int gnttab_init(void); int gnttab_suspend(void); int gnttab_resume(void); @@ -174,6 +190,8 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, struct gnttab_unmap_grant_ref *kunmap_ops, struct page **pages, unsigned int count); +void gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item); + /* Perform a batch of grant map/copy operations. Retry every batch slot * for which the hypervisor returns GNTST_eagain. This is typically due -- cgit v1.2.3-59-g8ed1b From 923b2919e2c318ee1c360a2119a14889fd0fcce4 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 18 Dec 2014 14:56:54 +0000 Subject: xen/gntdev: mark userspace PTEs as special on x86 PV guests In an x86 PV guest, get_user_pages_fast() on a userspace address range containing foreign mappings does not work correctly because the M2P lookup of the MFN from a userspace PTE may return the wrong page. Force get_user_pages_fast() to fail on such addresses by marking the PTEs as special. If Xen has XENFEAT_gnttab_map_avail_bits (available since at least 4.0), we can do so efficiently in the grant map hypercall. Otherwise, it needs to be done afterwards. This is both inefficient and racy (the mapping is visible to the task before we fixup the PTEs), but will be fine for well-behaved applications that do not use the mapping until after the mmap() system call returns. Guests with XENFEAT_auto_translated_physmap (ARM and x86 HVM or PVH) do not need this since get_user_pages() has always worked correctly for them. Signed-off-by: David Vrabel Reviewed-by: Stefano Stabellini --- drivers/xen/gntdev.c | 34 ++++++++++++++++++++++++++++++++++ include/xen/interface/features.h | 6 ++++++ include/xen/interface/grant_table.h | 7 +++++++ 3 files changed, 47 insertions(+) (limited to 'include') diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index bccc54a80559..20c65771017d 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -244,6 +244,14 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token, BUG_ON(pgnr >= map->count); pte_maddr = arbitrary_virt_to_machine(pte).maddr; + /* + * Set the PTE as special to force get_user_pages_fast() fall + * back to the slow path. If this is not supported as part of + * the grant map, it will be done afterwards. + */ + if (xen_feature(XENFEAT_gnttab_map_avail_bits)) + flags |= (1 << _GNTMAP_guest_avail0); + gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags, map->grants[pgnr].ref, map->grants[pgnr].domid); @@ -252,6 +260,15 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token, return 0; } +#ifdef CONFIG_X86 +static int set_grant_ptes_as_special(pte_t *pte, pgtable_t token, + unsigned long addr, void *data) +{ + set_pte_at(current->mm, addr, pte, pte_mkspecial(*pte)); + return 0; +} +#endif + static int map_grant_pages(struct grant_map *map) { int i, err = 0; @@ -840,6 +857,23 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) if (err) goto out_put_map; } + } else { +#ifdef CONFIG_X86 + /* + * If the PTEs were not made special by the grant map + * hypercall, do so here. + * + * This is racy since the mapping is already visible + * to userspace but userspace should be well-behaved + * enough to not touch it until the mmap() call + * returns. + */ + if (!xen_feature(XENFEAT_gnttab_map_avail_bits)) { + apply_to_page_range(vma->vm_mm, vma->vm_start, + vma->vm_end - vma->vm_start, + set_grant_ptes_as_special, NULL); + } +#endif } return 0; diff --git a/include/xen/interface/features.h b/include/xen/interface/features.h index 131a6ccdba25..6ad3d110bb81 100644 --- a/include/xen/interface/features.h +++ b/include/xen/interface/features.h @@ -41,6 +41,12 @@ /* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */ #define XENFEAT_mmu_pt_update_preserve_ad 5 +/* + * If set, GNTTABOP_map_grant_ref honors flags to be placed into guest kernel + * available pte bits. + */ +#define XENFEAT_gnttab_map_avail_bits 7 + /* x86: Does this Xen host support the HVM callback vector type? */ #define XENFEAT_hvm_callback_vector 8 diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h index bcce56439d64..56806bc90c2f 100644 --- a/include/xen/interface/grant_table.h +++ b/include/xen/interface/grant_table.h @@ -525,6 +525,13 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_cache_flush); #define _GNTMAP_contains_pte (4) #define GNTMAP_contains_pte (1<<_GNTMAP_contains_pte) +/* + * Bits to be placed in guest kernel available PTE bits (architecture + * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set). + */ +#define _GNTMAP_guest_avail0 (16) +#define GNTMAP_guest_avail_mask ((uint32_t)~0 << _GNTMAP_guest_avail0) + /* * Values for error status returns. All errors are -ve. */ -- cgit v1.2.3-59-g8ed1b