From d79647aea22732f39c81bbdc80931f96b46023f0 Mon Sep 17 00:00:00 2001 From: Daniel De Graaf Date: Mon, 7 Mar 2011 15:18:57 -0500 Subject: xen/gntdev,gntalloc: Remove unneeded VM flags The only time when granted pages need to be treated specially is when using Xen's PTE modification for grant mappings owned by another domain (that is, only gntdev on PV guests). Otherwise, the area does not require VM_DONTCOPY and VM_PFNMAP, since it can be accessed just like any other page of RAM. Since the vm_operations_struct close operations decrement reference counts, a corresponding open function that increments them is required now that it is possible to have multiple references to a single area. We are careful in the gntdev to check if we can remove those flags. The reason that we need to be careful in gntdev on PV guests is because we are not changing the PFN/MFN mapping on PV; instead, we change the application's page tables to point to the other domain's memory. This means that the vma cannot be copied without using another grant mapping hypercall; it also requires special handling on unmap, which is the reason for gntdev's dependency on the MMU notifier. For gntalloc, this is not a concern - the pages are owned by the domain using the gntalloc device, and can be mapped and unmapped in the same manner as any other page of memory. Acked-by: Ian Campbell Signed-off-by: Daniel De Graaf Signed-off-by: Konrad Rzeszutek Wilk [v2: Added in git commit "We are.." from email correspondence] --- drivers/xen/gntalloc.c | 14 ++++++++++++-- drivers/xen/gntdev.c | 16 ++++++++++++++-- 2 files changed, 26 insertions(+), 4 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c index a7ffdfe19fc9..f6832f46aea4 100644 --- a/drivers/xen/gntalloc.c +++ b/drivers/xen/gntalloc.c @@ -427,6 +427,17 @@ static long gntalloc_ioctl(struct file *filp, unsigned int cmd, return 0; } +static void gntalloc_vma_open(struct vm_area_struct *vma) +{ + struct gntalloc_gref *gref = vma->vm_private_data; + if (!gref) + return; + + spin_lock(&gref_lock); + gref->users++; + spin_unlock(&gref_lock); +} + static void gntalloc_vma_close(struct vm_area_struct *vma) { struct gntalloc_gref *gref = vma->vm_private_data; @@ -441,6 +452,7 @@ static void gntalloc_vma_close(struct vm_area_struct *vma) } static struct vm_operations_struct gntalloc_vmops = { + .open = gntalloc_vma_open, .close = gntalloc_vma_close, }; @@ -471,8 +483,6 @@ static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) vma->vm_private_data = gref; vma->vm_flags |= VM_RESERVED; - vma->vm_flags |= VM_DONTCOPY; - vma->vm_flags |= VM_PFNMAP | VM_PFN_AT_MMAP; vma->vm_ops = &gntalloc_vmops; diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index d96d311b858e..687761f65e5c 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -358,17 +358,26 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages) /* ------------------------------------------------------------------ */ +static void gntdev_vma_open(struct vm_area_struct *vma) +{ + struct grant_map *map = vma->vm_private_data; + + pr_debug("gntdev_vma_open %p\n", vma); + atomic_inc(&map->users); +} + static void gntdev_vma_close(struct vm_area_struct *vma) { struct grant_map *map = vma->vm_private_data; - pr_debug("close %p\n", vma); + pr_debug("gntdev_vma_close %p\n", vma); map->vma = NULL; vma->vm_private_data = NULL; gntdev_put_map(map); } static struct vm_operations_struct gntdev_vmops = { + .open = gntdev_vma_open, .close = gntdev_vma_close, }; @@ -680,7 +689,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) vma->vm_ops = &gntdev_vmops; - vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP; + vma->vm_flags |= VM_RESERVED|VM_DONTEXPAND; + + if (use_ptemod) + vma->vm_flags |= VM_DONTCOPY|VM_PFNMAP; vma->vm_private_data = map; -- cgit v1.2.3-59-g8ed1b From beafbdc1df02877612dc9039c1de0639921fddec Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Apr 2011 11:17:36 -0400 Subject: xen/irq: Check if the PCI device is owned by a domain different than DOMID_SELF. We check if there is a domain owner for the PCI device. In case of failure (meaning no domain has registered for this device) we make DOMID_SELF the owner. Signed-off-by: Konrad Rzeszutek Wilk [v2: deal with rebasing on v2.6.37-1] [v3: deal with rebasing on stable/irq.cleanup] [v4: deal with rebasing on stable/irq.ween_of_nr_irqs] [v5: deal with rebasing on v2.6.39-rc3] Signed-off-by: Jeremy Fitzhardinge Acked-by: Xiantao Zhang --- arch/x86/pci/xen.c | 21 ++++++++++++++++----- drivers/xen/events.c | 12 ++++++++---- include/xen/events.h | 3 ++- 3 files changed, 26 insertions(+), 10 deletions(-) (limited to 'drivers/xen') diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 6075f2d65335..393981feb12f 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -108,7 +108,8 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) } irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0, (type == PCI_CAP_ID_MSIX) ? - "msi-x" : "msi"); + "msi-x" : "msi", + DOMID_SELF); if (irq < 0) goto error; dev_dbg(&dev->dev, @@ -148,7 +149,8 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0, (type == PCI_CAP_ID_MSIX) ? "pcifront-msi-x" : - "pcifront-msi"); + "pcifront-msi", + DOMID_SELF); if (irq < 0) goto free; i++; @@ -190,9 +192,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) list_for_each_entry(msidesc, &dev->msi_list, list) { struct physdev_map_pirq map_irq; + domid_t domid; + + domid = ret = xen_find_device_domain_owner(dev); + /* N.B. Casting int's -ENODEV to uint16_t results in 0xFFED, + * hence check ret value for < 0. */ + if (ret < 0) + domid = DOMID_SELF; memset(&map_irq, 0, sizeof(map_irq)); - map_irq.domid = DOMID_SELF; + map_irq.domid = domid; map_irq.type = MAP_PIRQ_TYPE_MSI; map_irq.index = -1; map_irq.pirq = -1; @@ -215,14 +224,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); if (ret) { - dev_warn(&dev->dev, "xen map irq failed %d\n", ret); + dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n", + ret, domid); goto out; } ret = xen_bind_pirq_msi_to_irq(dev, msidesc, map_irq.pirq, map_irq.index, (type == PCI_CAP_ID_MSIX) ? - "msi-x" : "msi"); + "msi-x" : "msi", + domid); if (ret < 0) goto out; } diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 42d6c930cc87..ac0e22826357 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -101,6 +101,7 @@ struct irq_info unsigned short gsi; unsigned char vector; unsigned char flags; + uint16_t domid; } pirq; } u; }; @@ -184,6 +185,7 @@ static void xen_irq_info_pirq_init(unsigned irq, unsigned short pirq, unsigned short gsi, unsigned short vector, + uint16_t domid, unsigned char flags) { struct irq_info *info = info_for_irq(irq); @@ -193,6 +195,7 @@ static void xen_irq_info_pirq_init(unsigned irq, info->u.pirq.pirq = pirq; info->u.pirq.gsi = gsi; info->u.pirq.vector = vector; + info->u.pirq.domid = domid; info->u.pirq.flags = flags; } @@ -655,7 +658,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi, goto out; } - xen_irq_info_pirq_init(irq, 0, pirq, gsi, irq_op.vector, + xen_irq_info_pirq_init(irq, 0, pirq, gsi, irq_op.vector, DOMID_SELF, shareable ? PIRQ_SHAREABLE : 0); out: @@ -680,7 +683,8 @@ int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc) } int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, - int pirq, int vector, const char *name) + int pirq, int vector, const char *name, + domid_t domid) { int irq, ret; @@ -693,7 +697,7 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_level_irq, name); - xen_irq_info_pirq_init(irq, 0, pirq, 0, vector, 0); + xen_irq_info_pirq_init(irq, 0, pirq, 0, vector, domid, 0); ret = irq_set_msi_desc(irq, msidesc); if (ret < 0) goto error_irq; @@ -722,7 +726,7 @@ int xen_destroy_irq(int irq) if (xen_initial_domain()) { unmap_irq.pirq = info->u.pirq.pirq; - unmap_irq.domid = DOMID_SELF; + unmap_irq.domid = info->u.pirq.domid; rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq); if (rc) { printk(KERN_WARNING "unmap irq failed %d\n", rc); diff --git a/include/xen/events.h b/include/xen/events.h index f1b87ad48ac7..9aecc0b5a0e6 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -85,7 +85,8 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi, int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc); /* Bind an PSI pirq to an irq. */ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, - int pirq, int vector, const char *name); + int pirq, int vector, const char *name, + domid_t domid); #endif /* De-allocates the above mentioned physical interrupt. */ -- cgit v1.2.3-59-g8ed1b From c7c2c3a28657cfdcef50c02b18ccca3761209e17 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 8 Nov 2010 14:26:36 -0500 Subject: xen/irq: Add support to check if IRQ line is shared with other domains. We do this via the PHYSDEVOP_irq_status_query support hypervisor call. We will get a positive value if another domain has binded its PIRQ to the specified GSI (IRQ line). [v2: Deal with v2.6.37-rc1 rebase fallout] [v3: Deal with stable/irq.cleanup fallout] [v4: xen_ignore_irq->xen_test_irq_shared] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/events.c | 12 ++++++++++++ include/xen/events.h | 3 +++ 2 files changed, 15 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/events.c b/drivers/xen/events.c index ac0e22826357..0ac7a149e7f2 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -1508,6 +1508,18 @@ void xen_poll_irq(int irq) xen_poll_irq_timeout(irq, 0 /* no timeout */); } +/* Check whether the IRQ line is shared with other guests. */ +int xen_test_irq_shared(int irq) +{ + struct irq_info *info = info_for_irq(irq); + struct physdev_irq_status_query irq_status = { .irq = info->u.pirq.pirq }; + + if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status)) + return 0; + return !(irq_status.flags & XENIRQSTAT_shared); +} +EXPORT_SYMBOL_GPL(xen_test_irq_shared); + void xen_irq_resume(void) { unsigned int cpu, evtchn; diff --git a/include/xen/events.h b/include/xen/events.h index 9aecc0b5a0e6..932e54051d3e 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -95,4 +95,7 @@ int xen_destroy_irq(int irq); /* Return irq from pirq */ int xen_irq_from_pirq(unsigned pirq); +/* Determine whether to ignore this IRQ if it is passed to a guest. */ +int xen_test_irq_shared(int irq); + #endif /* _XEN_EVENTS_H */ -- cgit v1.2.3-59-g8ed1b From e6197acc726ab3baa60375a5891d58c2ee87e0f3 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 24 Feb 2011 14:20:12 -0500 Subject: xen/irq: Export 'xen_pirq_from_irq' function. We need this to find the real Xen PIRQ value for a device that requests an MSI or MSI-X. In the past we used 'xen_gsi_from_irq' since that function would return an Xen PIRQ or GSI depending on the provided IRQ. Now that we have seperated that we need to use the correct function. [v2: Deal with rebase on stable/irq.cleanup] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/events.c | 6 ++++++ include/xen/events.h | 3 +++ 2 files changed, 9 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 0ac7a149e7f2..e4e8e9a745bf 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -763,6 +763,12 @@ out: return irq; } + +int xen_pirq_from_irq(unsigned irq) +{ + return pirq_from_irq(irq); +} +EXPORT_SYMBOL_GPL(xen_pirq_from_irq); int bind_evtchn_to_irq(unsigned int evtchn) { int irq; diff --git a/include/xen/events.h b/include/xen/events.h index 932e54051d3e..9af21e19545a 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -95,6 +95,9 @@ int xen_destroy_irq(int irq); /* Return irq from pirq */ int xen_irq_from_pirq(unsigned pirq); +/* Return the pirq allocated to the irq. */ +int xen_pirq_from_irq(unsigned irq); + /* Determine whether to ignore this IRQ if it is passed to a guest. */ int xen_test_irq_shared(int irq); -- cgit v1.2.3-59-g8ed1b From 1eff1ad0285038e309a81da4a004f071608309fb Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 16 Feb 2011 16:26:44 -0500 Subject: xen/irq: The Xen hypervisor cleans up the PIRQs if the other domain forgot. And if the other domain forgot to clean up its PIRQs we don't need to fail the operation. Just take a note of it and continue on. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/events.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/xen') diff --git a/drivers/xen/events.c b/drivers/xen/events.c index e4e8e9a745bf..e51f3c50bd40 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -728,7 +728,14 @@ int xen_destroy_irq(int irq) unmap_irq.pirq = info->u.pirq.pirq; unmap_irq.domid = info->u.pirq.domid; rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq); - if (rc) { + /* If another domain quits without making the pci_disable_msix + * call, the Xen hypervisor takes care of freeing the PIRQs + * (free_domain_pirqs). + */ + if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF)) + printk(KERN_INFO "domain %d does not have %d anymore\n", + info->u.pirq.domid, info->u.pirq.pirq); + else if (rc) { printk(KERN_WARNING "unmap irq failed %d\n", rc); goto out; } -- cgit v1.2.3-59-g8ed1b From cf8d91633ddef9e816ccbf3da833c79ce508988d Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 28 Feb 2011 17:58:48 -0500 Subject: xen/p2m/m2p/gnttab: Support GNTMAP_host_map in the M2P override. We only supported the M2P (and P2M) override only for the GNTMAP_contains_pte type mappings. Meaning that we grants operations would "contain the machine address of the PTE to update" If the flag is unset, then the grant operation is "contains a host virtual address". The latter case means that the Hypervisor takes care of updating our page table (specifically the PTE entry) with the guest's MFN. As such we should not try to do anything with the PTE. Previous to this patch we would try to clear the PTE which resulted in Xen hypervisor being upset with us: (XEN) mm.c:1066:d0 Attempt to implicitly unmap a granted PTE c0100000ccc59067 (XEN) domain_crash called from mm.c:1067 (XEN) Domain 0 (vcpu#0) crashed on cpu#3: (XEN) ----[ Xen-4.0-110228 x86_64 debug=y Not tainted ]---- and crashing us. This patch allows us to inhibit the PTE clearing in the PV guest if the GNTMAP_contains_pte is not set. On the m2p_remove_override path we provide the same parameter. Sadly in the grant-table driver we do not have a mechanism to tell m2p_remove_override whether to clear the PTE or not. Since the grant-table driver is used by user-space, we can safely assume that it operates only on PTE's. Hence the implementation for it to work on !GNTMAP_contains_pte returns -EOPNOTSUPP. In the future we can implement the support for this. It will require some extra accounting structure to keep track of the page[i], and the flag. [v1: Added documentation details, made it return -EOPNOTSUPP instead of trying to do a half-way implementation] Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/xen/page.h | 5 +++-- arch/x86/xen/p2m.c | 10 ++++------ drivers/xen/grant-table.c | 31 ++++++++++++++++++++++++------- 3 files changed, 31 insertions(+), 15 deletions(-) (limited to 'drivers/xen') diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index c61934fbf22a..64a619d47d34 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -47,8 +47,9 @@ extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); extern unsigned long set_phys_range_identity(unsigned long pfn_s, unsigned long pfn_e); -extern int m2p_add_override(unsigned long mfn, struct page *page); -extern int m2p_remove_override(struct page *page); +extern int m2p_add_override(unsigned long mfn, struct page *page, + bool clear_pte); +extern int m2p_remove_override(struct page *page, bool clear_pte); extern struct page *m2p_find_override(unsigned long mfn); extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 141eb0de8b06..2d2b32af3a1d 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -650,7 +650,7 @@ static unsigned long mfn_hash(unsigned long mfn) } /* Add an MFN override for a particular page */ -int m2p_add_override(unsigned long mfn, struct page *page) +int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte) { unsigned long flags; unsigned long pfn; @@ -662,7 +662,6 @@ int m2p_add_override(unsigned long mfn, struct page *page) if (!PageHighMem(page)) { address = (unsigned long)__va(pfn << PAGE_SHIFT); ptep = lookup_address(address, &level); - if (WARN(ptep == NULL || level != PG_LEVEL_4K, "m2p_add_override: pfn %lx not mapped", pfn)) return -EINVAL; @@ -674,10 +673,9 @@ int m2p_add_override(unsigned long mfn, struct page *page) if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) return -ENOMEM; - if (!PageHighMem(page)) + if (clear_pte && !PageHighMem(page)) /* Just zap old mapping for now */ pte_clear(&init_mm, address, ptep); - spin_lock_irqsave(&m2p_override_lock, flags); list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); spin_unlock_irqrestore(&m2p_override_lock, flags); @@ -685,7 +683,7 @@ int m2p_add_override(unsigned long mfn, struct page *page) return 0; } -int m2p_remove_override(struct page *page) +int m2p_remove_override(struct page *page, bool clear_pte) { unsigned long flags; unsigned long mfn; @@ -713,7 +711,7 @@ int m2p_remove_override(struct page *page) spin_unlock_irqrestore(&m2p_override_lock, flags); set_phys_to_machine(pfn, page->index); - if (!PageHighMem(page)) + if (clear_pte && !PageHighMem(page)) set_pte_at(&init_mm, address, ptep, pfn_pte(pfn, PAGE_KERNEL)); /* No tlb flush necessary because the caller already diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 3745a318defc..fd725cde6ad1 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -466,13 +466,30 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, if (map_ops[i].status) continue; - /* m2p override only supported for GNTMAP_contains_pte mappings */ - if (!(map_ops[i].flags & GNTMAP_contains_pte)) - continue; - pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) + + if (map_ops[i].flags & GNTMAP_contains_pte) { + pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) + (map_ops[i].host_addr & ~PAGE_MASK)); - mfn = pte_mfn(*pte); - ret = m2p_add_override(mfn, pages[i]); + mfn = pte_mfn(*pte); + } else { + /* If you really wanted to do this: + * mfn = PFN_DOWN(map_ops[i].dev_bus_addr); + * + * The reason we do not implement it is b/c on the + * unmap path (gnttab_unmap_refs) we have no means of + * checking whether the page is !GNTMAP_contains_pte. + * + * That is without some extra data-structure to carry + * the struct page, bool clear_pte, and list_head next + * tuples and deal with allocation/delallocation, etc. + * + * The users of this API set the GNTMAP_contains_pte + * flag so lets just return not supported until it + * becomes neccessary to implement. + */ + return -EOPNOTSUPP; + } + ret = m2p_add_override(mfn, pages[i], + map_ops[i].flags & GNTMAP_contains_pte); if (ret) return ret; } @@ -494,7 +511,7 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, return ret; for (i = 0; i < count; i++) { - ret = m2p_remove_override(pages[i]); + ret = m2p_remove_override(pages[i], true /* clear the PTE */); if (ret) return ret; } -- cgit v1.2.3-59-g8ed1b