From 814d04e7dfc4a9cf7e36656afe2da5c0c08dde2b Mon Sep 17 00:00:00 2001 From: Valentin Priescu Date: Tue, 20 May 2014 22:28:50 +0200 Subject: xen-blkback: defer freeing blkif to avoid blocking xenwatch Currently xenwatch blocks in VBD disconnect, waiting for all pending I/O requests to finish. If the VBD is attached to a hot-swappable disk, then xenwatch can hang for a long period of time, stalling other watches. INFO: task xenwatch:39 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. ffff880057f01bd0 0000000000000246 ffff880057f01ac0 ffffffff810b0782 ffff880057f01ad0 00000000000131c0 0000000000000004 ffff880057edb040 ffff8800344c6080 0000000000000000 ffff880058c00ba0 ffff880057edb040 Call Trace: [] ? irq_to_desc+0x12/0x20 [] ? list_del+0x11/0x40 [] ? wait_for_common+0x60/0x160 [] ? _raw_spin_lock_irqsave+0x2f/0x50 [] ? _raw_spin_unlock_irqrestore+0x19/0x20 [] schedule+0x3a/0x60 [] xen_blkif_disconnect+0x8a/0x100 [xen_blkback] [] ? wake_up_bit+0x40/0x40 [] xen_blkbk_remove+0xae/0x1e0 [xen_blkback] [] xenbus_dev_remove+0x44/0x90 [] __device_release_driver+0x77/0xd0 [] device_release_driver+0x28/0x40 [] bus_remove_device+0x78/0xe0 [] device_del+0x12f/0x1a0 [] device_unregister+0x1d/0x60 [] frontend_changed+0xa6/0x4d0 [xen_blkback] [] ? frontend_changed+0x192/0x650 [xen_netback] [] ? cmp_dev+0x60/0x60 [] ? bus_for_each_dev+0x94/0xa0 [] xenbus_otherend_changed+0xbe/0x120 [] frontend_changed+0xb/0x10 [] xenwatch_thread+0xf2/0x130 [] ? wake_up_bit+0x40/0x40 [] ? xenbus_directory+0x80/0x80 [] kthread+0x96/0xa0 [] kernel_thread_helper+0x4/0x10 [] ? int_ret_from_sys_call+0x7/0x1b [] ? retint_restore_args+0x5/0x6 [] ? gs_change+0x13/0x13 With this patch, when there is still pending I/O, the actual disconnect is done by the last reference holder (last pending I/O request). In this case, xenwatch doesn't block indefinitely. Signed-off-by: Valentin Priescu Reviewed-by: Steven Kady Reviewed-by: Steven Noonan Reviewed-by: David Vrabel Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/common.h | 4 ++-- drivers/block/xen-blkback/xenbus.c | 46 ++++++++++++++++++++++++++++---------- 2 files changed, 36 insertions(+), 14 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index be052773ad03..f65b807e3236 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -314,7 +314,7 @@ struct xen_blkif { unsigned long long st_rd_sect; unsigned long long st_wr_sect; - wait_queue_head_t waiting_to_free; + struct work_struct free_work; /* Thread shutdown wait queue. */ wait_queue_head_t shutdown_wq; }; @@ -361,7 +361,7 @@ struct pending_req { #define xen_blkif_put(_b) \ do { \ if (atomic_dec_and_test(&(_b)->refcnt)) \ - wake_up(&(_b)->waiting_to_free);\ + schedule_work(&(_b)->free_work);\ } while (0) struct phys_req { diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index a71ecf5f4283..3a8b810b4980 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -35,12 +35,26 @@ static void connect(struct backend_info *); static int connect_ring(struct backend_info *); static void backend_changed(struct xenbus_watch *, const char **, unsigned int); +static void xen_blkif_free(struct xen_blkif *blkif); +static void xen_vbd_free(struct xen_vbd *vbd); struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be) { return be->dev; } +/* + * The last request could free the device from softirq context and + * xen_blkif_free() can sleep. + */ +static void xen_blkif_deferred_free(struct work_struct *work) +{ + struct xen_blkif *blkif; + + blkif = container_of(work, struct xen_blkif, free_work); + xen_blkif_free(blkif); +} + static int blkback_name(struct xen_blkif *blkif, char *buf) { char *devpath, *devname; @@ -121,7 +135,6 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) init_completion(&blkif->drain_complete); atomic_set(&blkif->drain, 0); blkif->st_print = jiffies; - init_waitqueue_head(&blkif->waiting_to_free); blkif->persistent_gnts.rb_node = NULL; spin_lock_init(&blkif->free_pages_lock); INIT_LIST_HEAD(&blkif->free_pages); @@ -132,6 +145,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants); INIT_LIST_HEAD(&blkif->pending_free); + INIT_WORK(&blkif->free_work, xen_blkif_deferred_free); for (i = 0; i < XEN_BLKIF_REQS; i++) { req = kzalloc(sizeof(*req), GFP_KERNEL); @@ -231,7 +245,7 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, return 0; } -static void xen_blkif_disconnect(struct xen_blkif *blkif) +static int xen_blkif_disconnect(struct xen_blkif *blkif) { if (blkif->xenblkd) { kthread_stop(blkif->xenblkd); @@ -239,9 +253,12 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif) blkif->xenblkd = NULL; } - atomic_dec(&blkif->refcnt); - wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); - atomic_inc(&blkif->refcnt); + /* The above kthread_stop() guarantees that at this point we + * don't have any discard_io or other_io requests. So, checking + * for inflight IO is enough. + */ + if (atomic_read(&blkif->inflight) > 0) + return -EBUSY; if (blkif->irq) { unbind_from_irqhandler(blkif->irq, blkif); @@ -252,6 +269,8 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif) xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring); blkif->blk_rings.common.sring = NULL; } + + return 0; } static void xen_blkif_free(struct xen_blkif *blkif) @@ -259,8 +278,8 @@ static void xen_blkif_free(struct xen_blkif *blkif) struct pending_req *req, *n; int i = 0, j; - if (!atomic_dec_and_test(&blkif->refcnt)) - BUG(); + xen_blkif_disconnect(blkif); + xen_vbd_free(&blkif->vbd); /* Remove all persistent grants and the cache of ballooned pages. */ xen_blkbk_free_caches(blkif); @@ -449,16 +468,15 @@ static int xen_blkbk_remove(struct xenbus_device *dev) be->backend_watch.node = NULL; } + dev_set_drvdata(&dev->dev, NULL); + if (be->blkif) { xen_blkif_disconnect(be->blkif); - xen_vbd_free(&be->blkif->vbd); - xen_blkif_free(be->blkif); - be->blkif = NULL; + xen_blkif_put(be->blkif); } kfree(be->mode); kfree(be); - dev_set_drvdata(&dev->dev, NULL); return 0; } @@ -705,7 +723,11 @@ static void frontend_changed(struct xenbus_device *dev, * Enforce precondition before potential leak point. * xen_blkif_disconnect() is idempotent. */ - xen_blkif_disconnect(be->blkif); + err = xen_blkif_disconnect(be->blkif); + if (err) { + xenbus_dev_fatal(dev, err, "pending I/O"); + break; + } err = connect_ring(be); if (err) -- cgit v1.2.3-59-g8ed1b