diff options
Diffstat (limited to 'drivers/block/xen-blkfront.c')
-rw-r--r-- | drivers/block/xen-blkfront.c | 239 |
1 files changed, 119 insertions, 120 deletions
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index ca13df854639..be4fea6a5dd3 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -196,6 +196,7 @@ struct blkfront_info unsigned int nr_ring_pages; struct request_queue *rq; unsigned int feature_flush; + unsigned int feature_fua; unsigned int feature_discard:1; unsigned int feature_secdiscard:1; unsigned int discard_granularity; @@ -207,6 +208,9 @@ struct blkfront_info struct blk_mq_tag_set tag_set; struct blkfront_ring_info *rinfo; unsigned int nr_rings; + /* Save uncomplete reqs and bios for migration. */ + struct list_head requests; + struct bio_list bio_list; }; static unsigned int nr_minors; @@ -544,7 +548,7 @@ static int blkif_queue_discard_req(struct request *req, struct blkfront_ring_inf ring_req->u.discard.nr_sectors = blk_rq_sectors(req); ring_req->u.discard.id = id; ring_req->u.discard.sector_number = (blkif_sector_t)blk_rq_pos(req); - if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard) + if (req_op(req) == REQ_OP_SECURE_ERASE && info->feature_secdiscard) ring_req->u.discard.flag = BLKIF_DISCARD_SECURE; else ring_req->u.discard.flag = 0; @@ -743,7 +747,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri * The indirect operation can only be a BLKIF_OP_READ or * BLKIF_OP_WRITE */ - BUG_ON(req->cmd_flags & (REQ_FLUSH | REQ_FUA)); + BUG_ON(req_op(req) == REQ_OP_FLUSH || req->cmd_flags & REQ_FUA); ring_req->operation = BLKIF_OP_INDIRECT; ring_req->u.indirect.indirect_op = rq_data_dir(req) ? BLKIF_OP_WRITE : BLKIF_OP_READ; @@ -755,7 +759,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri ring_req->u.rw.handle = info->handle; ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : BLKIF_OP_READ; - if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) { + if (req_op(req) == REQ_OP_FLUSH || req->cmd_flags & REQ_FUA) { /* * Ideally we can do an unordered flush-to-disk. * In case the backend onlysupports barriers, use that. @@ -763,19 +767,14 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri * implement it the same way. (It's also a FLUSH+FUA, * since it is guaranteed ordered WRT previous writes.) */ - switch (info->feature_flush & - ((REQ_FLUSH|REQ_FUA))) { - case REQ_FLUSH|REQ_FUA: + if (info->feature_flush && info->feature_fua) ring_req->operation = BLKIF_OP_WRITE_BARRIER; - break; - case REQ_FLUSH: + else if (info->feature_flush) ring_req->operation = BLKIF_OP_FLUSH_DISKCACHE; - break; - default: + else ring_req->operation = 0; - } } ring_req->u.rw.nr_segments = num_grant; if (unlikely(require_extra_req)) { @@ -844,7 +843,8 @@ static int blkif_queue_request(struct request *req, struct blkfront_ring_info *r if (unlikely(rinfo->dev_info->connected != BLKIF_STATE_CONNECTED)) return 1; - if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) + if (unlikely(req_op(req) == REQ_OP_DISCARD || + req_op(req) == REQ_OP_SECURE_ERASE)) return blkif_queue_discard_req(req, rinfo); else return blkif_queue_rw_req(req, rinfo); @@ -864,18 +864,22 @@ static inline bool blkif_request_flush_invalid(struct request *req, struct blkfront_info *info) { return ((req->cmd_type != REQ_TYPE_FS) || - ((req->cmd_flags & REQ_FLUSH) && - !(info->feature_flush & REQ_FLUSH)) || + ((req_op(req) == REQ_OP_FLUSH) && + !info->feature_flush) || ((req->cmd_flags & REQ_FUA) && - !(info->feature_flush & REQ_FUA))); + !info->feature_fua)); } static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *qd) { unsigned long flags; - struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)hctx->driver_data; + int qid = hctx->queue_num; + struct blkfront_info *info = hctx->queue->queuedata; + struct blkfront_ring_info *rinfo = NULL; + BUG_ON(info->nr_rings <= qid); + rinfo = &info->rinfo[qid]; blk_mq_start_request(qd->rq); spin_lock_irqsave(&rinfo->ring_lock, flags); if (RING_FULL(&rinfo->ring)) @@ -901,20 +905,9 @@ out_busy: return BLK_MQ_RQ_QUEUE_BUSY; } -static int blk_mq_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, - unsigned int index) -{ - struct blkfront_info *info = (struct blkfront_info *)data; - - BUG_ON(info->nr_rings <= index); - hctx->driver_data = &info->rinfo[index]; - return 0; -} - static struct blk_mq_ops blkfront_mq_ops = { .queue_rq = blkif_queue_rq, .map_queue = blk_mq_map_queue, - .init_hctx = blk_mq_init_hctx, }; static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, @@ -950,6 +943,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, return PTR_ERR(rq); } + rq->queuedata = info; queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); if (info->feature_discard) { @@ -958,7 +952,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, rq->limits.discard_granularity = info->discard_granularity; rq->limits.discard_alignment = info->discard_alignment; if (info->feature_secdiscard) - queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, rq); + queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, rq); } /* Hard sector size and max sectors impersonate the equiv. hardware. */ @@ -984,24 +978,22 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, return 0; } -static const char *flush_info(unsigned int feature_flush) +static const char *flush_info(struct blkfront_info *info) { - switch (feature_flush & ((REQ_FLUSH | REQ_FUA))) { - case REQ_FLUSH|REQ_FUA: + if (info->feature_flush && info->feature_fua) return "barrier: enabled;"; - case REQ_FLUSH: + else if (info->feature_flush) return "flush diskcache: enabled;"; - default: + else return "barrier or flush: disabled;"; - } } static void xlvbd_flush(struct blkfront_info *info) { - blk_queue_write_cache(info->rq, info->feature_flush & REQ_FLUSH, - info->feature_flush & REQ_FUA); + blk_queue_write_cache(info->rq, info->feature_flush ? true : false, + info->feature_fua ? true : false); pr_info("blkfront: %s: %s %s %s %s %s\n", - info->gd->disk_name, flush_info(info->feature_flush), + info->gd->disk_name, flush_info(info), "persistent grants:", info->feature_persistent ? "enabled;" : "disabled;", "indirect descriptors:", info->max_indirect_segments ? "enabled;" : "disabled;"); @@ -1142,7 +1134,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, gd->first_minor = minor; gd->fops = &xlvbd_block_fops; gd->private_data = info; - gd->driverfs_dev = &(info->xbdev->dev); set_capacity(gd, capacity); if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size, @@ -1600,7 +1591,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) info->feature_discard = 0; info->feature_secdiscard = 0; queue_flag_clear(QUEUE_FLAG_DISCARD, rq); - queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq); + queue_flag_clear(QUEUE_FLAG_SECERASE, rq); } blk_mq_complete_request(req, error); break; @@ -1620,6 +1611,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) if (unlikely(error)) { if (error == -EOPNOTSUPP) error = 0; + info->feature_fua = 0; info->feature_flush = 0; xlvbd_flush(info); } @@ -2008,69 +2000,22 @@ static int blkif_recover(struct blkfront_info *info) { unsigned int i, r_index; struct request *req, *n; - struct blk_shadow *copy; int rc; struct bio *bio, *cloned_bio; - struct bio_list bio_list, merge_bio; unsigned int segs, offset; int pending, size; struct split_bio *split_bio; - struct list_head requests; blkfront_gather_backend_features(info); segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; blk_queue_max_segments(info->rq, segs); - bio_list_init(&bio_list); - INIT_LIST_HEAD(&requests); for (r_index = 0; r_index < info->nr_rings; r_index++) { - struct blkfront_ring_info *rinfo; - - rinfo = &info->rinfo[r_index]; - /* Stage 1: Make a safe copy of the shadow state. */ - copy = kmemdup(rinfo->shadow, sizeof(rinfo->shadow), - GFP_NOIO | __GFP_REPEAT | __GFP_HIGH); - if (!copy) - return -ENOMEM; - - /* Stage 2: Set up free list. */ - memset(&rinfo->shadow, 0, sizeof(rinfo->shadow)); - for (i = 0; i < BLK_RING_SIZE(info); i++) - rinfo->shadow[i].req.u.rw.id = i+1; - rinfo->shadow_free = rinfo->ring.req_prod_pvt; - rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff; + struct blkfront_ring_info *rinfo = &info->rinfo[r_index]; rc = blkfront_setup_indirect(rinfo); - if (rc) { - kfree(copy); + if (rc) return rc; - } - - for (i = 0; i < BLK_RING_SIZE(info); i++) { - /* Not in use? */ - if (!copy[i].request) - continue; - - /* - * Get the bios in the request so we can re-queue them. - */ - if (copy[i].request->cmd_flags & - (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) { - /* - * Flush operations don't contain bios, so - * we need to requeue the whole request - */ - list_add(©[i].request->queuelist, &requests); - continue; - } - merge_bio.head = copy[i].request->bio; - merge_bio.tail = copy[i].request->biotail; - bio_list_merge(&bio_list, &merge_bio); - copy[i].request->bio = NULL; - blk_end_request_all(copy[i].request, 0); - } - - kfree(copy); } xenbus_switch_state(info->xbdev, XenbusStateConnected); @@ -2085,7 +2030,7 @@ static int blkif_recover(struct blkfront_info *info) kick_pending_request_queues(rinfo); } - list_for_each_entry_safe(req, n, &requests, queuelist) { + list_for_each_entry_safe(req, n, &info->requests, queuelist) { /* Requeue pending requests (flush or discard) */ list_del_init(&req->queuelist); BUG_ON(req->nr_phys_segments > segs); @@ -2093,7 +2038,7 @@ static int blkif_recover(struct blkfront_info *info) } blk_mq_kick_requeue_list(info->rq); - while ((bio = bio_list_pop(&bio_list)) != NULL) { + while ((bio = bio_list_pop(&info->bio_list)) != NULL) { /* Traverse the list of pending bios and re-queue them */ if (bio_segments(bio) > segs) { /* @@ -2114,7 +2059,7 @@ static int blkif_recover(struct blkfront_info *info) bio_trim(cloned_bio, offset, size); cloned_bio->bi_private = split_bio; cloned_bio->bi_end_io = split_bio_end; - submit_bio(cloned_bio->bi_rw, cloned_bio); + submit_bio(cloned_bio); } /* * Now we have to wait for all those smaller bios to @@ -2123,7 +2068,7 @@ static int blkif_recover(struct blkfront_info *info) continue; } /* We don't need to split this bio */ - submit_bio(bio->bi_rw, bio); + submit_bio(bio); } return 0; @@ -2139,9 +2084,47 @@ static int blkfront_resume(struct xenbus_device *dev) { struct blkfront_info *info = dev_get_drvdata(&dev->dev); int err = 0; + unsigned int i, j; dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename); + bio_list_init(&info->bio_list); + INIT_LIST_HEAD(&info->requests); + for (i = 0; i < info->nr_rings; i++) { + struct blkfront_ring_info *rinfo = &info->rinfo[i]; + struct bio_list merge_bio; + struct blk_shadow *shadow = rinfo->shadow; + + for (j = 0; j < BLK_RING_SIZE(info); j++) { + /* Not in use? */ + if (!shadow[j].request) + continue; + + /* + * Get the bios in the request so we can re-queue them. + */ + if (req_op(shadow[i].request) == REQ_OP_FLUSH || + req_op(shadow[i].request) == REQ_OP_DISCARD || + req_op(shadow[i].request) == REQ_OP_SECURE_ERASE || + shadow[j].request->cmd_flags & REQ_FUA) { + /* + * Flush operations don't contain bios, so + * we need to requeue the whole request + * + * XXX: but this doesn't make any sense for a + * write with the FUA flag set.. + */ + list_add(&shadow[j].request->queuelist, &info->requests); + continue; + } + merge_bio.head = shadow[j].request->bio; + merge_bio.tail = shadow[j].request->biotail; + bio_list_merge(&info->bio_list, &merge_bio); + shadow[j].request->bio = NULL; + blk_mq_end_request(shadow[j].request, 0); + } + } + blkif_free(info, info->connected == BLKIF_STATE_CONNECTED); err = negotiate_mq(info); @@ -2149,6 +2132,8 @@ static int blkfront_resume(struct xenbus_device *dev) return err; err = talk_to_blkback(dev, info); + if (!err) + blk_mq_update_nr_hw_queues(&info->tag_set, info->nr_rings); /* * We have to wait for the backend to switch to @@ -2212,10 +2197,9 @@ static void blkfront_setup_discard(struct blkfront_info *info) info->discard_granularity = discard_granularity; info->discard_alignment = discard_alignment; } - err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "discard-secure", "%d", &discard_secure, - NULL); - if (!err) + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "discard-secure", "%u", &discard_secure); + if (err > 0) info->feature_secdiscard = !!discard_secure; } @@ -2313,10 +2297,10 @@ static void blkfront_gather_backend_features(struct blkfront_info *info) unsigned int indirect_segments; info->feature_flush = 0; + info->feature_fua = 0; - err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "feature-barrier", "%d", &barrier, - NULL); + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "feature-barrier", "%d", &barrier); /* * If there's no "feature-barrier" defined, then it means @@ -2325,38 +2309,40 @@ static void blkfront_gather_backend_features(struct blkfront_info *info) * * If there are barriers, then we use flush. */ - if (!err && barrier) - info->feature_flush = REQ_FLUSH | REQ_FUA; + if (err > 0 && barrier) { + info->feature_flush = 1; + info->feature_fua = 1; + } + /* * And if there is "feature-flush-cache" use that above * barriers. */ - err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "feature-flush-cache", "%d", &flush, - NULL); + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "feature-flush-cache", "%d", &flush); - if (!err && flush) - info->feature_flush = REQ_FLUSH; + if (err > 0 && flush) { + info->feature_flush = 1; + info->feature_fua = 0; + } - err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "feature-discard", "%d", &discard, - NULL); + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "feature-discard", "%d", &discard); - if (!err && discard) + if (err > 0 && discard) blkfront_setup_discard(info); - err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "feature-persistent", "%u", &persistent, - NULL); - if (err) + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "feature-persistent", "%d", &persistent); + if (err <= 0) info->feature_persistent = 0; else info->feature_persistent = persistent; - err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "feature-max-indirect-segments", "%u", &indirect_segments, - NULL); - if (err) + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "feature-max-indirect-segments", "%u", + &indirect_segments); + if (err <= 0) info->max_indirect_segments = 0; else info->max_indirect_segments = min(indirect_segments, @@ -2456,7 +2442,7 @@ static void blkfront_connect(struct blkfront_info *info) for (i = 0; i < info->nr_rings; i++) kick_pending_request_queues(&info->rinfo[i]); - add_disk(info->gd); + device_add_disk(&info->xbdev->dev, info->gd); info->is_ready = 1; } @@ -2485,10 +2471,23 @@ static void blkback_changed(struct xenbus_device *dev, break; case XenbusStateConnected: - if (dev->state != XenbusStateInitialised) { + /* + * talk_to_blkback sets state to XenbusStateInitialised + * and blkfront_connect sets it to XenbusStateConnected + * (if connection went OK). + * + * If the backend (or toolstack) decides to poke at backend + * state (and re-trigger the watch by setting the state repeatedly + * to XenbusStateConnected (4)) we need to deal with this. + * This is allowed as this is used to communicate to the guest + * that the size of disk has changed! + */ + if ((dev->state != XenbusStateInitialised) && + (dev->state != XenbusStateConnected)) { if (talk_to_blkback(dev, info)) break; } + blkfront_connect(info); break; |