diff options
Diffstat (limited to 'drivers/xen/xenbus')
-rw-r--r-- | drivers/xen/xenbus/xenbus.h | 4 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_client.c | 424 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_comms.c | 8 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_dev_frontend.c | 4 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe.c | 278 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe_backend.c | 10 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe_frontend.c | 21 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_xs.c | 34 |
8 files changed, 509 insertions, 274 deletions
diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h index 5f5b8a7d5b80..2754bdfadcb8 100644 --- a/drivers/xen/xenbus/xenbus.h +++ b/drivers/xen/xenbus/xenbus.h @@ -44,6 +44,8 @@ struct xen_bus_type { int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename); int (*probe)(struct xen_bus_type *bus, const char *type, const char *dir); + bool (*otherend_will_handle)(struct xenbus_watch *watch, + const char *path, const char *token); void (*otherend_changed)(struct xenbus_watch *watch, const char *path, const char *token); struct bus_type bus; @@ -104,7 +106,7 @@ void xs_request_exit(struct xb_req_data *req); int xenbus_match(struct device *_dev, struct device_driver *_drv); int xenbus_dev_probe(struct device *_dev); -int xenbus_dev_remove(struct device *_dev); +void xenbus_dev_remove(struct device *_dev); int xenbus_register_driver_common(struct xenbus_driver *drv, struct xen_bus_type *bus, struct module *owner, diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index e17ca8156171..d4b251925796 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -69,11 +69,24 @@ struct xenbus_map_node { unsigned int nr_handles; }; +struct map_ring_valloc { + struct xenbus_map_node *node; + + /* Why do we need two arrays? See comment of __xenbus_map_ring */ + unsigned long addrs[XENBUS_MAX_RING_GRANTS]; + phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS]; + + struct gnttab_map_grant_ref map[XENBUS_MAX_RING_GRANTS]; + struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS]; + + unsigned int idx; +}; + static DEFINE_SPINLOCK(xenbus_valloc_lock); static LIST_HEAD(xenbus_valloc_pages); struct xenbus_ring_ops { - int (*map)(struct xenbus_device *dev, + int (*map)(struct xenbus_device *dev, struct map_ring_valloc *info, grant_ref_t *gnt_refs, unsigned int nr_grefs, void **vaddr); int (*unmap)(struct xenbus_device *dev, void *vaddr); @@ -114,18 +127,22 @@ EXPORT_SYMBOL_GPL(xenbus_strstate); */ int xenbus_watch_path(struct xenbus_device *dev, const char *path, struct xenbus_watch *watch, + bool (*will_handle)(struct xenbus_watch *, + const char *, const char *), void (*callback)(struct xenbus_watch *, const char *, const char *)) { int err; watch->node = path; + watch->will_handle = will_handle; watch->callback = callback; err = register_xenbus_watch(watch); if (err) { watch->node = NULL; + watch->will_handle = NULL; watch->callback = NULL; xenbus_dev_fatal(dev, err, "adding watch on %s", path); } @@ -152,6 +169,8 @@ EXPORT_SYMBOL_GPL(xenbus_watch_path); */ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, + bool (*will_handle)(struct xenbus_watch *, + const char *, const char *), void (*callback)(struct xenbus_watch *, const char *, const char *), const char *pathfmt, ...) @@ -168,7 +187,7 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch"); return -ENOMEM; } - err = xenbus_watch_path(dev, path, watch, callback); + err = xenbus_watch_path(dev, path, watch, will_handle, callback); if (err) kfree(path); @@ -344,46 +363,95 @@ static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err, __xenbus_switch_state(dev, XenbusStateClosing, 1); } -/** - * xenbus_grant_ring +/* + * xenbus_setup_ring * @dev: xenbus device - * @vaddr: starting virtual address of the ring + * @vaddr: pointer to starting virtual address of the ring * @nr_pages: number of pages to be granted * @grefs: grant reference array to be filled in * - * Grant access to the given @vaddr to the peer of the given device. - * Then fill in @grefs with grant references. Return 0 on success, or - * -errno on error. On error, the device will switch to - * XenbusStateClosing, and the error will be saved in the store. + * Allocate physically contiguous pages for a shared ring buffer and grant it + * to the peer of the given device. The ring buffer is initially filled with + * zeroes. The virtual address of the ring is stored at @vaddr and the + * grant references are stored in the @grefs array. In case of error @vaddr + * will be set to NULL and @grefs will be filled with INVALID_GRANT_REF. */ -int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, +int xenbus_setup_ring(struct xenbus_device *dev, gfp_t gfp, void **vaddr, unsigned int nr_pages, grant_ref_t *grefs) { - int err; - int i, j; + unsigned long ring_size = nr_pages * XEN_PAGE_SIZE; + grant_ref_t gref_head; + unsigned int i; + void *addr; + int ret; + + addr = *vaddr = alloc_pages_exact(ring_size, gfp | __GFP_ZERO); + if (!*vaddr) { + ret = -ENOMEM; + goto err; + } + + ret = gnttab_alloc_grant_references(nr_pages, &gref_head); + if (ret) { + xenbus_dev_fatal(dev, ret, "granting access to %u ring pages", + nr_pages); + goto err; + } for (i = 0; i < nr_pages; i++) { - err = gnttab_grant_foreign_access(dev->otherend_id, - virt_to_gfn(vaddr), 0); - if (err < 0) { - xenbus_dev_fatal(dev, err, - "granting access to ring page"); - goto fail; - } - grefs[i] = err; + unsigned long gfn; - vaddr = vaddr + XEN_PAGE_SIZE; + if (is_vmalloc_addr(*vaddr)) + gfn = pfn_to_gfn(vmalloc_to_pfn(addr)); + else + gfn = virt_to_gfn(addr); + + grefs[i] = gnttab_claim_grant_reference(&gref_head); + gnttab_grant_foreign_access_ref(grefs[i], dev->otherend_id, + gfn, 0); + + addr += XEN_PAGE_SIZE; } return 0; -fail: - for (j = 0; j < i; j++) - gnttab_end_foreign_access_ref(grefs[j], 0); - return err; + err: + if (*vaddr) + free_pages_exact(*vaddr, ring_size); + for (i = 0; i < nr_pages; i++) + grefs[i] = INVALID_GRANT_REF; + *vaddr = NULL; + + return ret; } -EXPORT_SYMBOL_GPL(xenbus_grant_ring); +EXPORT_SYMBOL_GPL(xenbus_setup_ring); +/* + * xenbus_teardown_ring + * @vaddr: starting virtual address of the ring + * @nr_pages: number of pages + * @grefs: grant reference array + * + * Remove grants for the shared ring buffer and free the associated memory. + * On return the grant reference array is filled with INVALID_GRANT_REF. + */ +void xenbus_teardown_ring(void **vaddr, unsigned int nr_pages, + grant_ref_t *grefs) +{ + unsigned int i; + + for (i = 0; i < nr_pages; i++) { + if (grefs[i] != INVALID_GRANT_REF) { + gnttab_end_foreign_access(grefs[i], NULL); + grefs[i] = INVALID_GRANT_REF; + } + } + + if (*vaddr) + free_pages_exact(*vaddr, nr_pages * XEN_PAGE_SIZE); + *vaddr = NULL; +} +EXPORT_SYMBOL_GPL(xenbus_teardown_ring); /** * Allocate an event channel for the given xenbus_device, assigning the newly @@ -391,7 +459,7 @@ EXPORT_SYMBOL_GPL(xenbus_grant_ring); * error, the device will switch to XenbusStateClosing, and the error will be * saved in the store. */ -int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port) +int xenbus_alloc_evtchn(struct xenbus_device *dev, evtchn_port_t *port) { struct evtchn_alloc_unbound alloc_unbound; int err; @@ -414,7 +482,7 @@ EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn); /** * Free an existing event channel. Returns 0 on success or -errno on error. */ -int xenbus_free_evtchn(struct xenbus_device *dev, int port) +int xenbus_free_evtchn(struct xenbus_device *dev, evtchn_port_t port) { struct evtchn_close close; int err; @@ -423,7 +491,7 @@ int xenbus_free_evtchn(struct xenbus_device *dev, int port) err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); if (err) - xenbus_dev_error(dev, err, "freeing event channel %d", port); + xenbus_dev_error(dev, err, "freeing event channel %u", port); return err; } @@ -440,15 +508,34 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn); * Map @nr_grefs pages of memory into this domain from another * domain's grant table. xenbus_map_ring_valloc allocates @nr_grefs * pages of virtual address space, maps the pages to that address, and - * sets *vaddr to that address. Returns 0 on success, and GNTST_* - * (see xen/include/interface/grant_table.h) or -ENOMEM / -EINVAL on + * sets *vaddr to that address. Returns 0 on success, and -errno on * error. If an error is returned, device will switch to * XenbusStateClosing and the error message will be saved in XenStore. */ int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t *gnt_refs, unsigned int nr_grefs, void **vaddr) { - return ring_ops->map(dev, gnt_refs, nr_grefs, vaddr); + int err; + struct map_ring_valloc *info; + + *vaddr = NULL; + + if (nr_grefs > XENBUS_MAX_RING_GRANTS) + return -EINVAL; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + info->node = kzalloc(sizeof(*info->node), GFP_KERNEL); + if (!info->node) + err = -ENOMEM; + else + err = ring_ops->map(dev, info, gnt_refs, nr_grefs, vaddr); + + kfree(info->node); + kfree(info); + return err; } EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); @@ -459,79 +546,105 @@ static int __xenbus_map_ring(struct xenbus_device *dev, grant_ref_t *gnt_refs, unsigned int nr_grefs, grant_handle_t *handles, - phys_addr_t *addrs, + struct map_ring_valloc *info, unsigned int flags, bool *leaked) { - struct gnttab_map_grant_ref map[XENBUS_MAX_RING_GRANTS]; - struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS]; int i, j; - int err = GNTST_okay; if (nr_grefs > XENBUS_MAX_RING_GRANTS) return -EINVAL; for (i = 0; i < nr_grefs; i++) { - memset(&map[i], 0, sizeof(map[i])); - gnttab_set_map_op(&map[i], addrs[i], flags, gnt_refs[i], - dev->otherend_id); + gnttab_set_map_op(&info->map[i], info->phys_addrs[i], flags, + gnt_refs[i], dev->otherend_id); handles[i] = INVALID_GRANT_HANDLE; } - gnttab_batch_map(map, i); + gnttab_batch_map(info->map, i); for (i = 0; i < nr_grefs; i++) { - if (map[i].status != GNTST_okay) { - err = map[i].status; - xenbus_dev_fatal(dev, map[i].status, + if (info->map[i].status != GNTST_okay) { + xenbus_dev_fatal(dev, info->map[i].status, "mapping in shared page %d from domain %d", gnt_refs[i], dev->otherend_id); goto fail; } else - handles[i] = map[i].handle; + handles[i] = info->map[i].handle; } - return GNTST_okay; + return 0; fail: for (i = j = 0; i < nr_grefs; i++) { if (handles[i] != INVALID_GRANT_HANDLE) { - memset(&unmap[j], 0, sizeof(unmap[j])); - gnttab_set_unmap_op(&unmap[j], (phys_addr_t)addrs[i], + gnttab_set_unmap_op(&info->unmap[j], + info->phys_addrs[i], GNTMAP_host_map, handles[i]); j++; } } - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, j)) - BUG(); + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, info->unmap, j)); *leaked = false; for (i = 0; i < j; i++) { - if (unmap[i].status != GNTST_okay) { + if (info->unmap[i].status != GNTST_okay) { *leaked = true; break; } } - return err; + return -ENOENT; } -struct map_ring_valloc_hvm +/** + * xenbus_unmap_ring + * @dev: xenbus device + * @handles: grant handle array + * @nr_handles: number of handles in the array + * @vaddrs: addresses to unmap + * + * Unmap memory in this domain that was imported from another domain. + * Returns 0 on success and returns GNTST_* on error + * (see xen/include/interface/grant_table.h). + */ +static int xenbus_unmap_ring(struct xenbus_device *dev, grant_handle_t *handles, + unsigned int nr_handles, unsigned long *vaddrs) { - unsigned int idx; + struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS]; + int i; + int err; - /* Why do we need two arrays? See comment of __xenbus_map_ring */ - phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS]; - unsigned long addrs[XENBUS_MAX_RING_GRANTS]; -}; + if (nr_handles > XENBUS_MAX_RING_GRANTS) + return -EINVAL; + + for (i = 0; i < nr_handles; i++) + gnttab_set_unmap_op(&unmap[i], vaddrs[i], + GNTMAP_host_map, handles[i]); + + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i)); + + err = GNTST_okay; + for (i = 0; i < nr_handles; i++) { + if (unmap[i].status != GNTST_okay) { + xenbus_dev_error(dev, unmap[i].status, + "unmapping page at handle %d error %d", + handles[i], unmap[i].status); + err = unmap[i].status; + break; + } + } + + return err; +} static void xenbus_map_ring_setup_grant_hvm(unsigned long gfn, unsigned int goffset, unsigned int len, void *data) { - struct map_ring_valloc_hvm *info = data; + struct map_ring_valloc *info = data; unsigned long vaddr = (unsigned long)gfn_to_virt(gfn); info->phys_addrs[info->idx] = vaddr; @@ -540,39 +653,28 @@ static void xenbus_map_ring_setup_grant_hvm(unsigned long gfn, info->idx++; } -static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, - grant_ref_t *gnt_ref, - unsigned int nr_grefs, - void **vaddr) +static int xenbus_map_ring_hvm(struct xenbus_device *dev, + struct map_ring_valloc *info, + grant_ref_t *gnt_ref, + unsigned int nr_grefs, + void **vaddr) { - struct xenbus_map_node *node; + struct xenbus_map_node *node = info->node; int err; void *addr; bool leaked = false; - struct map_ring_valloc_hvm info = { - .idx = 0, - }; unsigned int nr_pages = XENBUS_PAGES(nr_grefs); - if (nr_grefs > XENBUS_MAX_RING_GRANTS) - return -EINVAL; - - *vaddr = NULL; - - node = kzalloc(sizeof(*node), GFP_KERNEL); - if (!node) - return -ENOMEM; - - err = alloc_xenballooned_pages(nr_pages, node->hvm.pages); + err = xen_alloc_unpopulated_pages(nr_pages, node->hvm.pages); if (err) goto out_err; gnttab_foreach_grant(node->hvm.pages, nr_grefs, xenbus_map_ring_setup_grant_hvm, - &info); + info); err = __xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handles, - info.phys_addrs, GNTMAP_host_map, &leaked); + info, GNTMAP_host_map, &leaked); node->nr_handles = nr_grefs; if (err) @@ -592,61 +694,23 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, spin_unlock(&xenbus_valloc_lock); *vaddr = addr; + info->node = NULL; + return 0; out_xenbus_unmap_ring: if (!leaked) - xenbus_unmap_ring(dev, node->handles, nr_grefs, info.addrs); + xenbus_unmap_ring(dev, node->handles, nr_grefs, info->addrs); else pr_alert("leaking %p size %u page(s)", addr, nr_pages); out_free_ballooned_pages: if (!leaked) - free_xenballooned_pages(nr_pages, node->hvm.pages); + xen_free_unpopulated_pages(nr_pages, node->hvm.pages); out_err: - kfree(node); return err; } - -/** - * xenbus_map_ring - * @dev: xenbus device - * @gnt_refs: grant reference array - * @nr_grefs: number of grant reference - * @handles: pointer to grant handle to be filled - * @vaddrs: addresses to be mapped to - * @leaked: fail to clean up a failed map, caller should not free vaddr - * - * Map pages of memory into this domain from another domain's grant table. - * xenbus_map_ring does not allocate the virtual address space (you must do - * this yourself!). It only maps in the pages to the specified address. - * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h) - * or -ENOMEM / -EINVAL on error. If an error is returned, device will switch to - * XenbusStateClosing and the first error message will be saved in XenStore. - * Further more if we fail to map the ring, caller should check @leaked. - * If @leaked is not zero it means xenbus_map_ring fails to clean up, caller - * should not free the address space of @vaddr. - */ -int xenbus_map_ring(struct xenbus_device *dev, grant_ref_t *gnt_refs, - unsigned int nr_grefs, grant_handle_t *handles, - unsigned long *vaddrs, bool *leaked) -{ - phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS]; - int i; - - if (nr_grefs > XENBUS_MAX_RING_GRANTS) - return -EINVAL; - - for (i = 0; i < nr_grefs; i++) - phys_addrs[i] = (unsigned long)vaddrs[i]; - - return __xenbus_map_ring(dev, gnt_refs, nr_grefs, handles, - phys_addrs, GNTMAP_host_map, leaked); -} -EXPORT_SYMBOL_GPL(xenbus_map_ring); - - /** * xenbus_unmap_ring_vfree * @dev: xenbus device @@ -666,40 +730,33 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); #ifdef CONFIG_XEN_PV -static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, - grant_ref_t *gnt_refs, - unsigned int nr_grefs, - void **vaddr) +static int map_ring_apply(pte_t *pte, unsigned long addr, void *data) { - struct xenbus_map_node *node; - struct vm_struct *area; - pte_t *ptes[XENBUS_MAX_RING_GRANTS]; - phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS]; - int err = GNTST_okay; - int i; - bool leaked; - - *vaddr = NULL; + struct map_ring_valloc *info = data; - if (nr_grefs > XENBUS_MAX_RING_GRANTS) - return -EINVAL; + info->phys_addrs[info->idx++] = arbitrary_virt_to_machine(pte).maddr; + return 0; +} - node = kzalloc(sizeof(*node), GFP_KERNEL); - if (!node) - return -ENOMEM; +static int xenbus_map_ring_pv(struct xenbus_device *dev, + struct map_ring_valloc *info, + grant_ref_t *gnt_refs, + unsigned int nr_grefs, + void **vaddr) +{ + struct xenbus_map_node *node = info->node; + struct vm_struct *area; + bool leaked = false; + int err = -ENOMEM; - area = alloc_vm_area(XEN_PAGE_SIZE * nr_grefs, ptes); - if (!area) { - kfree(node); + area = get_vm_area(XEN_PAGE_SIZE * nr_grefs, VM_IOREMAP); + if (!area) return -ENOMEM; - } - - for (i = 0; i < nr_grefs; i++) - phys_addrs[i] = arbitrary_virt_to_machine(ptes[i]).maddr; - + if (apply_to_page_range(&init_mm, (unsigned long)area->addr, + XEN_PAGE_SIZE * nr_grefs, map_ring_apply, info)) + goto failed; err = __xenbus_map_ring(dev, gnt_refs, nr_grefs, node->handles, - phys_addrs, - GNTMAP_host_map | GNTMAP_contains_pte, + info, GNTMAP_host_map | GNTMAP_contains_pte, &leaked); if (err) goto failed; @@ -712,6 +769,8 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, spin_unlock(&xenbus_valloc_lock); *vaddr = area->addr; + info->node = NULL; + return 0; failed: @@ -720,11 +779,10 @@ failed: else pr_alert("leaking VM area %p size %u page(s)", area, nr_grefs); - kfree(node); return err; } -static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) +static int xenbus_unmap_ring_pv(struct xenbus_device *dev, void *vaddr) { struct xenbus_map_node *node; struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS]; @@ -761,8 +819,7 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) unmap[i].handle = node->handles[i]; } - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i)) - BUG(); + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i)); err = GNTST_okay; leaked = false; @@ -788,12 +845,12 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) } static const struct xenbus_ring_ops ring_ops_pv = { - .map = xenbus_map_ring_valloc_pv, - .unmap = xenbus_unmap_ring_vfree_pv, + .map = xenbus_map_ring_pv, + .unmap = xenbus_unmap_ring_pv, }; #endif -struct unmap_ring_vfree_hvm +struct unmap_ring_hvm { unsigned int idx; unsigned long addrs[XENBUS_MAX_RING_GRANTS]; @@ -804,19 +861,19 @@ static void xenbus_unmap_ring_setup_grant_hvm(unsigned long gfn, unsigned int len, void *data) { - struct unmap_ring_vfree_hvm *info = data; + struct unmap_ring_hvm *info = data; info->addrs[info->idx] = (unsigned long)gfn_to_virt(gfn); info->idx++; } -static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) +static int xenbus_unmap_ring_hvm(struct xenbus_device *dev, void *vaddr) { int rv; struct xenbus_map_node *node; void *addr; - struct unmap_ring_vfree_hvm info = { + struct unmap_ring_hvm info = { .idx = 0, }; unsigned int nr_pages; @@ -849,7 +906,7 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) info.addrs); if (!rv) { vunmap(vaddr); - free_xenballooned_pages(nr_pages, node->hvm.pages); + xen_free_unpopulated_pages(nr_pages, node->hvm.pages); } else WARN(1, "Leaking %p, size %u page(s)\n", vaddr, nr_pages); @@ -859,51 +916,6 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) } /** - * xenbus_unmap_ring - * @dev: xenbus device - * @handles: grant handle array - * @nr_handles: number of handles in the array - * @vaddrs: addresses to unmap - * - * Unmap memory in this domain that was imported from another domain. - * Returns 0 on success and returns GNTST_* on error - * (see xen/include/interface/grant_table.h). - */ -int xenbus_unmap_ring(struct xenbus_device *dev, - grant_handle_t *handles, unsigned int nr_handles, - unsigned long *vaddrs) -{ - struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS]; - int i; - int err; - - if (nr_handles > XENBUS_MAX_RING_GRANTS) - return -EINVAL; - - for (i = 0; i < nr_handles; i++) - gnttab_set_unmap_op(&unmap[i], vaddrs[i], - GNTMAP_host_map, handles[i]); - - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i)) - BUG(); - - err = GNTST_okay; - for (i = 0; i < nr_handles; i++) { - if (unmap[i].status != GNTST_okay) { - xenbus_dev_error(dev, unmap[i].status, - "unmapping page at handle %d error %d", - handles[i], unmap[i].status); - err = unmap[i].status; - break; - } - } - - return err; -} -EXPORT_SYMBOL_GPL(xenbus_unmap_ring); - - -/** * xenbus_read_driver_state * @path: path for driver * @@ -922,8 +934,8 @@ enum xenbus_state xenbus_read_driver_state(const char *path) EXPORT_SYMBOL_GPL(xenbus_read_driver_state); static const struct xenbus_ring_ops ring_ops_hvm = { - .map = xenbus_map_ring_valloc_hvm, - .unmap = xenbus_unmap_ring_vfree_hvm, + .map = xenbus_map_ring_hvm, + .unmap = xenbus_unmap_ring_hvm, }; void __init xenbus_ring_ops_init(void) diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index eb5151fc8efa..e5fda0256feb 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -57,16 +57,8 @@ DEFINE_MUTEX(xs_response_mutex); static int xenbus_irq; static struct task_struct *xenbus_task; -static DECLARE_WORK(probe_work, xenbus_probe); - - static irqreturn_t wake_waiting(int irq, void *unused) { - if (unlikely(xenstored_ready == 0)) { - xenstored_ready = 1; - schedule_work(&probe_work); - } - wake_up(&xb_waitq); return IRQ_HANDLED; } diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index 597af455a522..0792fda49a15 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -128,7 +128,7 @@ static ssize_t xenbus_file_read(struct file *filp, { struct xenbus_file_priv *u = filp->private_data; struct read_buffer *rb; - unsigned i; + ssize_t i; int ret; mutex_lock(&u->reply_mutex); @@ -148,7 +148,7 @@ again: rb = list_entry(u->read_buffers.next, struct read_buffer, list); i = 0; while (i < len) { - unsigned sz = min((unsigned)len - i, rb->len - rb->cons); + size_t sz = min_t(size_t, len - i, rb->len - rb->cons); ret = copy_to_user(ubuf + i, &rb->msg[rb->cons], sz); diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 8c4d05b687b7..58b732dcbfb8 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -31,6 +31,7 @@ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define dev_fmt pr_fmt #define DPRINTK(fmt, args...) \ pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \ @@ -51,7 +52,6 @@ #include <linux/module.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/xen/hypervisor.h> #include <xen/xen.h> @@ -65,6 +65,7 @@ #include "xenbus.h" +static int xs_init_irq; int xen_store_evtchn; EXPORT_SYMBOL_GPL(xen_store_evtchn); @@ -136,6 +137,7 @@ static int watch_otherend(struct xenbus_device *dev) container_of(dev->dev.bus, struct xen_bus_type, bus); return xenbus_watch_pathfmt(dev, &dev->otherend_watch, + bus->otherend_will_handle, bus->otherend_changed, "%s/%s", dev->otherend, "state"); } @@ -205,6 +207,64 @@ void xenbus_otherend_changed(struct xenbus_watch *watch, } EXPORT_SYMBOL_GPL(xenbus_otherend_changed); +#define XENBUS_SHOW_STAT(name) \ +static ssize_t name##_show(struct device *_dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + struct xenbus_device *dev = to_xenbus_device(_dev); \ + \ + return sprintf(buf, "%d\n", atomic_read(&dev->name)); \ +} \ +static DEVICE_ATTR_RO(name) + +XENBUS_SHOW_STAT(event_channels); +XENBUS_SHOW_STAT(events); +XENBUS_SHOW_STAT(spurious_events); +XENBUS_SHOW_STAT(jiffies_eoi_delayed); + +static ssize_t spurious_threshold_show(struct device *_dev, + struct device_attribute *attr, + char *buf) +{ + struct xenbus_device *dev = to_xenbus_device(_dev); + + return sprintf(buf, "%d\n", dev->spurious_threshold); +} + +static ssize_t spurious_threshold_store(struct device *_dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct xenbus_device *dev = to_xenbus_device(_dev); + unsigned int val; + ssize_t ret; + + ret = kstrtouint(buf, 0, &val); + if (ret) + return ret; + + dev->spurious_threshold = val; + + return count; +} + +static DEVICE_ATTR_RW(spurious_threshold); + +static struct attribute *xenbus_attrs[] = { + &dev_attr_event_channels.attr, + &dev_attr_events.attr, + &dev_attr_spurious_events.attr, + &dev_attr_jiffies_eoi_delayed.attr, + &dev_attr_spurious_threshold.attr, + NULL +}; + +static const struct attribute_group xenbus_group = { + .name = "xenbus", + .attrs = xenbus_attrs, +}; + int xenbus_dev_probe(struct device *_dev) { struct xenbus_device *dev = to_xenbus_device(_dev); @@ -252,6 +312,11 @@ int xenbus_dev_probe(struct device *_dev) return err; } + dev->spurious_threshold = 1; + if (sysfs_create_group(&dev->dev.kobj, &xenbus_group)) + dev_warn(&dev->dev, "sysfs_create_group on %s failed.\n", + dev->nodename); + return 0; fail_put: module_put(drv->driver.owner); @@ -261,13 +326,15 @@ fail: } EXPORT_SYMBOL_GPL(xenbus_dev_probe); -int xenbus_dev_remove(struct device *_dev) +void xenbus_dev_remove(struct device *_dev) { struct xenbus_device *dev = to_xenbus_device(_dev); struct xenbus_driver *drv = to_xenbus_driver(_dev->driver); DPRINTK("%s", dev->nodename); + sysfs_remove_group(&dev->dev.kobj, &xenbus_group); + free_otherend_watch(dev); if (drv->remove) { @@ -289,8 +356,6 @@ int xenbus_dev_remove(struct device *_dev) if (!drv->allow_rebind || xenbus_read_driver_state(dev->nodename) == XenbusStateClosing) xenbus_switch_state(dev, XenbusStateClosed); - - return 0; } EXPORT_SYMBOL_GPL(xenbus_dev_remove); @@ -608,7 +673,7 @@ int xenbus_dev_suspend(struct device *dev) if (drv->suspend) err = drv->suspend(xdev); if (err) - pr_warn("suspend %s failed: %i\n", dev_name(dev), err); + dev_warn(dev, "suspend failed: %i\n", err); return 0; } EXPORT_SYMBOL_GPL(xenbus_dev_suspend); @@ -627,8 +692,7 @@ int xenbus_dev_resume(struct device *dev) drv = to_xenbus_driver(dev->driver); err = talk_to_otherend(xdev); if (err) { - pr_warn("resume (talk_to_otherend) %s failed: %i\n", - dev_name(dev), err); + dev_warn(dev, "resume (talk_to_otherend) failed: %i\n", err); return err; } @@ -637,15 +701,14 @@ int xenbus_dev_resume(struct device *dev) if (drv->resume) { err = drv->resume(xdev); if (err) { - pr_warn("resume %s failed: %i\n", dev_name(dev), err); + dev_warn(dev, "resume failed: %i\n", err); return err; } } err = watch_otherend(xdev); if (err) { - pr_warn("resume (watch_otherend) %s failed: %d.\n", - dev_name(dev), err); + dev_warn(dev, "resume (watch_otherend) failed: %d\n", err); return err; } @@ -684,29 +747,123 @@ void unregister_xenstore_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(unregister_xenstore_notifier); -void xenbus_probe(struct work_struct *unused) +static void xenbus_probe(void) { xenstored_ready = 1; + if (!xen_store_interface) { + xen_store_interface = memremap(xen_store_gfn << XEN_PAGE_SHIFT, + XEN_PAGE_SIZE, MEMREMAP_WB); + /* + * Now it is safe to free the IRQ used for xenstore late + * initialization. No need to unbind: it is about to be + * bound again from xb_init_comms. Note that calling + * unbind_from_irqhandler now would result in xen_evtchn_close() + * being called and the event channel not being enabled again + * afterwards, resulting in missed event notifications. + */ + free_irq(xs_init_irq, &xb_waitq); + } + + /* + * In the HVM case, xenbus_init() deferred its call to + * xs_init() in case callbacks were not operational yet. + * So do it now. + */ + if (xen_store_domain_type == XS_HVM) + xs_init(); + /* Notify others that xenstore is up */ blocking_notifier_call_chain(&xenstore_chain, 0, NULL); } -EXPORT_SYMBOL_GPL(xenbus_probe); -static int __init xenbus_probe_initcall(void) +/* + * Returns true when XenStore init must be deferred in order to + * allow the PCI platform device to be initialised, before we + * can actually have event channel interrupts working. + */ +static bool xs_hvm_defer_init_for_callback(void) { - if (!xen_domain()) - return -ENODEV; +#ifdef CONFIG_XEN_PVHVM + return xen_store_domain_type == XS_HVM && + !xen_have_vector_callback; +#else + return false; +#endif +} - if (xen_initial_domain() || xen_hvm_domain()) - return 0; +static int xenbus_probe_thread(void *unused) +{ + DEFINE_WAIT(w); + + /* + * We actually just want to wait for *any* trigger of xb_waitq, + * and run xenbus_probe() the moment it occurs. + */ + prepare_to_wait(&xb_waitq, &w, TASK_INTERRUPTIBLE); + schedule(); + finish_wait(&xb_waitq, &w); - xenbus_probe(NULL); + DPRINTK("probing"); + xenbus_probe(); return 0; } +static int __init xenbus_probe_initcall(void) +{ + /* + * Probe XenBus here in the XS_PV case, and also XS_HVM unless we + * need to wait for the platform PCI device to come up or + * xen_store_interface is not ready. + */ + if (xen_store_domain_type == XS_PV || + (xen_store_domain_type == XS_HVM && + !xs_hvm_defer_init_for_callback() && + xen_store_interface != NULL)) + xenbus_probe(); + + /* + * For XS_LOCAL or when xen_store_interface is not ready, spawn a + * thread which will wait for xenstored or a xenstore-stubdom to be + * started, then probe. It will be triggered when communication + * starts happening, by waiting on xb_waitq. + */ + if (xen_store_domain_type == XS_LOCAL || xen_store_interface == NULL) { + struct task_struct *probe_task; + + probe_task = kthread_run(xenbus_probe_thread, NULL, + "xenbus_probe"); + if (IS_ERR(probe_task)) + return PTR_ERR(probe_task); + } + return 0; +} device_initcall(xenbus_probe_initcall); +int xen_set_callback_via(uint64_t via) +{ + struct xen_hvm_param a; + int ret; + + a.domid = DOMID_SELF; + a.index = HVM_PARAM_CALLBACK_IRQ; + a.value = via; + + ret = HYPERVISOR_hvm_op(HVMOP_set_param, &a); + if (ret) + return ret; + + /* + * If xenbus_probe_initcall() deferred the xenbus_probe() + * due to the callback not functioning yet, we can do it now. + */ + if (!xenstored_ready && xs_hvm_defer_init_for_callback()) + xenbus_probe(); + + return ret; +} +EXPORT_SYMBOL_GPL(xen_set_callback_via); + /* Set up event channel for xenstored which is run as a local process * (this is normally used only in dom0) */ @@ -767,10 +924,25 @@ static struct notifier_block xenbus_resume_nb = { .notifier_call = xenbus_resume_cb, }; +static irqreturn_t xenbus_late_init(int irq, void *unused) +{ + int err; + uint64_t v = 0; + + err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); + if (err || !v || !~v) + return IRQ_HANDLED; + xen_store_gfn = (unsigned long)v; + + wake_up(&xb_waitq); + return IRQ_HANDLED; +} + static int __init xenbus_init(void) { - int err = 0; + int err; uint64_t v = 0; + bool wait = false; xen_store_domain_type = XS_UNKNOWN; if (!xen_domain()) @@ -809,21 +981,69 @@ static int __init xenbus_init(void) err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); if (err) goto out_error; - xen_store_gfn = (unsigned long)v; - xen_store_interface = - xen_remap(xen_store_gfn << XEN_PAGE_SHIFT, - XEN_PAGE_SIZE); + /* + * Uninitialized hvm_params are zero and return no error. + * Although it is theoretically possible to have + * HVM_PARAM_STORE_PFN set to zero on purpose, in reality it is + * not zero when valid. If zero, it means that Xenstore hasn't + * been properly initialized. Instead of attempting to map a + * wrong guest physical address return error. + * + * Also recognize all bits set as an invalid/uninitialized value. + */ + if (!v) { + err = -ENOENT; + goto out_error; + } + if (v == ~0ULL) { + wait = true; + } else { + /* Avoid truncation on 32-bit. */ +#if BITS_PER_LONG == 32 + if (v > ULONG_MAX) { + pr_err("%s: cannot handle HVM_PARAM_STORE_PFN=%llx > ULONG_MAX\n", + __func__, v); + err = -EINVAL; + goto out_error; + } +#endif + xen_store_gfn = (unsigned long)v; + xen_store_interface = + memremap(xen_store_gfn << XEN_PAGE_SHIFT, + XEN_PAGE_SIZE, MEMREMAP_WB); + if (xen_store_interface->connection != XENSTORE_CONNECTED) + wait = true; + } + if (wait) { + err = bind_evtchn_to_irqhandler(xen_store_evtchn, + xenbus_late_init, + 0, "xenstore_late_init", + &xb_waitq); + if (err < 0) { + pr_err("xenstore_late_init couldn't bind irq err=%d\n", + err); + return err; + } + + xs_init_irq = err; + } break; default: pr_warn("Xenstore state unknown\n"); break; } - /* Initialize the interface to xenstore. */ - err = xs_init(); - if (err) { - pr_warn("Error initializing xenstore comms: %i\n", err); - goto out_error; + /* + * HVM domains may not have a functional callback yet. In that + * case let xs_init() be called from xenbus_probe(), which will + * get invoked at an appropriate time. + */ + if (xen_store_domain_type != XS_HVM) { + err = xs_init(); + if (err) { + pr_warn("Error initializing xenstore comms: %i\n", err); + goto out_error; + } } if ((xen_store_domain_type != XS_LOCAL) && @@ -837,8 +1057,10 @@ static int __init xenbus_init(void) */ proc_create_mount_point("xen"); #endif + return 0; out_error: + xen_store_domain_type = XS_UNKNOWN; return err; } diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c index 9b2fbe69bccc..9c09f89d8278 100644 --- a/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -48,7 +48,6 @@ #include <linux/semaphore.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/xen/hypervisor.h> #include <asm/hypervisor.h> #include <xen/xenbus.h> @@ -181,6 +180,12 @@ static int xenbus_probe_backend(struct xen_bus_type *bus, const char *type, return err; } +static bool frontend_will_handle(struct xenbus_watch *watch, + const char *path, const char *token) +{ + return watch->nr_pending == 0; +} + static void frontend_changed(struct xenbus_watch *watch, const char *path, const char *token) { @@ -192,6 +197,7 @@ static struct xen_bus_type xenbus_backend = { .levels = 3, /* backend/type/<frontend>/<id> */ .get_bus_id = backend_bus_id, .probe = xenbus_probe_backend, + .otherend_will_handle = frontend_will_handle, .otherend_changed = frontend_changed, .bus = { .name = "xen-backend", @@ -299,7 +305,7 @@ static int __init xenbus_probe_backend_init(void) register_xenstore_notifier(&xenstore_notifier); - if (register_shrinker(&backend_memory_shrinker)) + if (register_shrinker(&backend_memory_shrinker, "xen-backend")) pr_warn("shrinker registration failed\n"); return 0; diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index 8a1650bbe18f..f44d5a64351e 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -19,7 +19,6 @@ #include <linux/module.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/xen/hypervisor.h> #include <xen/xenbus.h> #include <xen/events.h> @@ -41,7 +40,7 @@ static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) return -EINVAL; } - strlcpy(bus_id, nodename + 1, XEN_BUS_ID_SIZE); + strscpy(bus_id, nodename + 1, XEN_BUS_ID_SIZE); if (!strchr(bus_id, '/')) { pr_warn("bus_id %s no slash\n", bus_id); return -EINVAL; @@ -212,19 +211,11 @@ static int is_device_connecting(struct device *dev, void *data, bool ignore_none if (drv && (dev->driver != drv)) return 0; - if (ignore_nonessential) { - /* With older QEMU, for PVonHVM guests the guest config files - * could contain: vfb = [ 'vnc=1, vnclisten=0.0.0.0'] - * which is nonsensical as there is no PV FB (there can be - * a PVKB) running as HVM guest. */ + xendrv = to_xenbus_driver(dev->driver); - if ((strncmp(xendev->nodename, "device/vkbd", 11) == 0)) - return 0; + if (ignore_nonessential && xendrv->not_essential) + return 0; - if ((strncmp(xendev->nodename, "device/vfb", 10) == 0)) - return 0; - } - xendrv = to_xenbus_driver(dev->driver); return (xendev->state < XenbusStateConnected || (xendev->state == XenbusStateConnected && xendrv->is_ready && !xendrv->is_ready(xendev))); @@ -402,12 +393,12 @@ static void xenbus_reset_frontend(char *fe, char *be, int be_state) case XenbusStateConnected: xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosing); xenbus_reset_wait_for_backend(be, XenbusStateClosing); - /* fall through */ + fallthrough; case XenbusStateClosing: xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosed); xenbus_reset_wait_for_backend(be, XenbusStateClosed); - /* fall through */ + fallthrough; case XenbusStateClosed: xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateInitialising); diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 3a06eb699f33..12e02eb01f59 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -705,9 +705,13 @@ int xs_watch_msg(struct xs_watch_event *event) spin_lock(&watches_lock); event->handle = find_watch(event->token); - if (event->handle != NULL) { + if (event->handle != NULL && + (!event->handle->will_handle || + event->handle->will_handle(event->handle, + event->path, event->token))) { spin_lock(&watch_events_lock); list_add_tail(&event->list, &watch_events); + event->handle->nr_pending++; wake_up(&watch_events_waitq); spin_unlock(&watch_events_lock); } else @@ -765,6 +769,8 @@ int register_xenbus_watch(struct xenbus_watch *watch) sprintf(token, "%lX", (long)watch); + watch->nr_pending = 0; + down_read(&xs_watch_rwsem); spin_lock(&watches_lock); @@ -814,11 +820,14 @@ void unregister_xenbus_watch(struct xenbus_watch *watch) /* Cancel pending watch events. */ spin_lock(&watch_events_lock); - list_for_each_entry_safe(event, tmp, &watch_events, list) { - if (event->handle != watch) - continue; - list_del(&event->list); - kfree(event); + if (watch->nr_pending) { + list_for_each_entry_safe(event, tmp, &watch_events, list) { + if (event->handle != watch) + continue; + list_del(&event->list); + kfree(event); + } + watch->nr_pending = 0; } spin_unlock(&watch_events_lock); @@ -865,7 +874,6 @@ void xs_suspend_cancel(void) static int xenwatch_thread(void *unused) { - struct list_head *ent; struct xs_watch_event *event; xenwatch_pid = current->pid; @@ -880,13 +888,15 @@ static int xenwatch_thread(void *unused) mutex_lock(&xenwatch_mutex); spin_lock(&watch_events_lock); - ent = watch_events.next; - if (ent != &watch_events) - list_del(ent); + event = list_first_entry_or_null(&watch_events, + struct xs_watch_event, list); + if (event) { + list_del(&event->list); + event->handle->nr_pending--; + } spin_unlock(&watch_events_lock); - if (ent != &watch_events) { - event = list_entry(ent, struct xs_watch_event, list); + if (event) { event->handle->callback(event->handle, event->path, event->token); kfree(event); |