aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/nvme-core.c158
-rw-r--r--drivers/block/rbd.c25
-rw-r--r--drivers/block/xen-blkback/blkback.c177
-rw-r--r--drivers/block/xen-blkback/common.h3
4 files changed, 247 insertions, 116 deletions
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index f7d083bb3bd5..d826bf3e62c8 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -106,7 +106,7 @@ struct nvme_queue {
dma_addr_t cq_dma_addr;
u32 __iomem *q_db;
u16 q_depth;
- u16 cq_vector;
+ s16 cq_vector;
u16 sq_head;
u16 sq_tail;
u16 cq_head;
@@ -215,6 +215,7 @@ static void nvme_set_info(struct nvme_cmd_info *cmd, void *ctx,
cmd->fn = handler;
cmd->ctx = ctx;
cmd->aborted = 0;
+ blk_mq_start_request(blk_mq_rq_from_pdu(cmd));
}
/* Special values must be less than 0x1000 */
@@ -431,8 +432,13 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
if (unlikely(status)) {
if (!(status & NVME_SC_DNR || blk_noretry_request(req))
&& (jiffies - req->start_time) < req->timeout) {
+ unsigned long flags;
+
blk_mq_requeue_request(req);
- blk_mq_kick_requeue_list(req->q);
+ spin_lock_irqsave(req->q->queue_lock, flags);
+ if (!blk_queue_stopped(req->q))
+ blk_mq_kick_requeue_list(req->q);
+ spin_unlock_irqrestore(req->q->queue_lock, flags);
return;
}
req->errors = nvme_error_status(status);
@@ -664,8 +670,6 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
}
}
- blk_mq_start_request(req);
-
nvme_set_info(cmd, iod, req_completion);
spin_lock_irq(&nvmeq->q_lock);
if (req->cmd_flags & REQ_DISCARD)
@@ -835,6 +839,7 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev)
if (IS_ERR(req))
return PTR_ERR(req);
+ req->cmd_flags |= REQ_NO_TIMEOUT;
cmd_info = blk_mq_rq_to_pdu(req);
nvme_set_info(cmd_info, req, async_req_completion);
@@ -1016,14 +1021,19 @@ static void nvme_abort_req(struct request *req)
struct nvme_command cmd;
if (!nvmeq->qid || cmd_rq->aborted) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev_list_lock, flags);
if (work_busy(&dev->reset_work))
- return;
+ goto out;
list_del_init(&dev->node);
dev_warn(&dev->pci_dev->dev,
"I/O %d QID %d timeout, reset controller\n",
req->tag, nvmeq->qid);
dev->reset_workfn = nvme_reset_failed_dev;
queue_work(nvme_workq, &dev->reset_work);
+ out:
+ spin_unlock_irqrestore(&dev_list_lock, flags);
return;
}
@@ -1064,15 +1074,22 @@ static void nvme_cancel_queue_ios(struct blk_mq_hw_ctx *hctx,
void *ctx;
nvme_completion_fn fn;
struct nvme_cmd_info *cmd;
- static struct nvme_completion cqe = {
- .status = cpu_to_le16(NVME_SC_ABORT_REQ << 1),
- };
+ struct nvme_completion cqe;
+
+ if (!blk_mq_request_started(req))
+ return;
cmd = blk_mq_rq_to_pdu(req);
if (cmd->ctx == CMD_CTX_CANCELLED)
return;
+ if (blk_queue_dying(req->q))
+ cqe.status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1);
+ else
+ cqe.status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
+
+
dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d QID %d\n",
req->tag, nvmeq->qid);
ctx = cancel_cmd_info(cmd, &fn);
@@ -1084,17 +1101,29 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
struct nvme_queue *nvmeq = cmd->nvmeq;
- dev_warn(nvmeq->q_dmadev, "Timeout I/O %d QID %d\n", req->tag,
- nvmeq->qid);
- if (nvmeq->dev->initialized)
- nvme_abort_req(req);
-
/*
* The aborted req will be completed on receiving the abort req.
* We enable the timer again. If hit twice, it'll cause a device reset,
* as the device then is in a faulty state.
*/
- return BLK_EH_RESET_TIMER;
+ int ret = BLK_EH_RESET_TIMER;
+
+ dev_warn(nvmeq->q_dmadev, "Timeout I/O %d QID %d\n", req->tag,
+ nvmeq->qid);
+
+ spin_lock_irq(&nvmeq->q_lock);
+ if (!nvmeq->dev->initialized) {
+ /*
+ * Force cancelled command frees the request, which requires we
+ * return BLK_EH_NOT_HANDLED.
+ */
+ nvme_cancel_queue_ios(nvmeq->hctx, req, nvmeq, reserved);
+ ret = BLK_EH_NOT_HANDLED;
+ } else
+ nvme_abort_req(req);
+ spin_unlock_irq(&nvmeq->q_lock);
+
+ return ret;
}
static void nvme_free_queue(struct nvme_queue *nvmeq)
@@ -1175,6 +1204,8 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid)
adapter_delete_sq(dev, qid);
adapter_delete_cq(dev, qid);
}
+ if (!qid && dev->admin_q)
+ blk_mq_freeze_queue_start(dev->admin_q);
nvme_clear_queue(nvmeq);
}
@@ -1361,6 +1392,14 @@ static struct blk_mq_ops nvme_mq_ops = {
.timeout = nvme_timeout,
};
+static void nvme_dev_remove_admin(struct nvme_dev *dev)
+{
+ if (dev->admin_q && !blk_queue_dying(dev->admin_q)) {
+ blk_cleanup_queue(dev->admin_q);
+ blk_mq_free_tag_set(&dev->admin_tagset);
+ }
+}
+
static int nvme_alloc_admin_tags(struct nvme_dev *dev)
{
if (!dev->admin_q) {
@@ -1380,17 +1419,16 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
blk_mq_free_tag_set(&dev->admin_tagset);
return -ENOMEM;
}
- }
+ if (!blk_get_queue(dev->admin_q)) {
+ nvme_dev_remove_admin(dev);
+ return -ENODEV;
+ }
+ } else
+ blk_mq_unfreeze_queue(dev->admin_q);
return 0;
}
-static void nvme_free_admin_tags(struct nvme_dev *dev)
-{
- if (dev->admin_q)
- blk_mq_free_tag_set(&dev->admin_tagset);
-}
-
static int nvme_configure_admin_queue(struct nvme_dev *dev)
{
int result;
@@ -1445,19 +1483,13 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
if (result)
goto free_nvmeq;
- result = nvme_alloc_admin_tags(dev);
- if (result)
- goto free_nvmeq;
-
nvmeq->cq_vector = 0;
result = queue_request_irq(dev, nvmeq, nvmeq->irqname);
if (result)
- goto free_tags;
+ goto free_nvmeq;
return result;
- free_tags:
- nvme_free_admin_tags(dev);
free_nvmeq:
nvme_free_queues(dev, 0);
return result;
@@ -2242,13 +2274,18 @@ static void nvme_wait_dq(struct nvme_delq_ctx *dq, struct nvme_dev *dev)
break;
if (!schedule_timeout(ADMIN_TIMEOUT) ||
fatal_signal_pending(current)) {
+ /*
+ * Disable the controller first since we can't trust it
+ * at this point, but leave the admin queue enabled
+ * until all queue deletion requests are flushed.
+ * FIXME: This may take a while if there are more h/w
+ * queues than admin tags.
+ */
set_current_state(TASK_RUNNING);
-
nvme_disable_ctrl(dev, readq(&dev->bar->cap));
- nvme_disable_queue(dev, 0);
-
- send_sig(SIGKILL, dq->worker->task, 1);
+ nvme_clear_queue(dev->queues[0]);
flush_kthread_worker(dq->worker);
+ nvme_disable_queue(dev, 0);
return;
}
}
@@ -2325,7 +2362,6 @@ static void nvme_del_queue_start(struct kthread_work *work)
{
struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
cmdinfo.work);
- allow_signal(SIGKILL);
if (nvme_delete_sq(nvmeq))
nvme_del_queue_end(nvmeq);
}
@@ -2383,6 +2419,34 @@ static void nvme_dev_list_remove(struct nvme_dev *dev)
kthread_stop(tmp);
}
+static void nvme_freeze_queues(struct nvme_dev *dev)
+{
+ struct nvme_ns *ns;
+
+ list_for_each_entry(ns, &dev->namespaces, list) {
+ blk_mq_freeze_queue_start(ns->queue);
+
+ spin_lock(ns->queue->queue_lock);
+ queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue);
+ spin_unlock(ns->queue->queue_lock);
+
+ blk_mq_cancel_requeue_work(ns->queue);
+ blk_mq_stop_hw_queues(ns->queue);
+ }
+}
+
+static void nvme_unfreeze_queues(struct nvme_dev *dev)
+{
+ struct nvme_ns *ns;
+
+ list_for_each_entry(ns, &dev->namespaces, list) {
+ queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue);
+ blk_mq_unfreeze_queue(ns->queue);
+ blk_mq_start_stopped_hw_queues(ns->queue, true);
+ blk_mq_kick_requeue_list(ns->queue);
+ }
+}
+
static void nvme_dev_shutdown(struct nvme_dev *dev)
{
int i;
@@ -2391,8 +2455,10 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
dev->initialized = 0;
nvme_dev_list_remove(dev);
- if (dev->bar)
+ if (dev->bar) {
+ nvme_freeze_queues(dev);
csts = readl(&dev->bar->csts);
+ }
if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) {
for (i = dev->queue_count - 1; i >= 0; i--) {
struct nvme_queue *nvmeq = dev->queues[i];
@@ -2407,12 +2473,6 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
nvme_dev_unmap(dev);
}
-static void nvme_dev_remove_admin(struct nvme_dev *dev)
-{
- if (dev->admin_q && !blk_queue_dying(dev->admin_q))
- blk_cleanup_queue(dev->admin_q);
-}
-
static void nvme_dev_remove(struct nvme_dev *dev)
{
struct nvme_ns *ns;
@@ -2420,8 +2480,10 @@ static void nvme_dev_remove(struct nvme_dev *dev)
list_for_each_entry(ns, &dev->namespaces, list) {
if (ns->disk->flags & GENHD_FL_UP)
del_gendisk(ns->disk);
- if (!blk_queue_dying(ns->queue))
+ if (!blk_queue_dying(ns->queue)) {
+ blk_mq_abort_requeue_list(ns->queue);
blk_cleanup_queue(ns->queue);
+ }
}
}
@@ -2502,6 +2564,7 @@ static void nvme_free_dev(struct kref *kref)
nvme_free_namespaces(dev);
nvme_release_instance(dev);
blk_mq_free_tag_set(&dev->tagset);
+ blk_put_queue(dev->admin_q);
kfree(dev->queues);
kfree(dev->entry);
kfree(dev);
@@ -2598,15 +2661,20 @@ static int nvme_dev_start(struct nvme_dev *dev)
}
nvme_init_queue(dev->queues[0], 0);
+ result = nvme_alloc_admin_tags(dev);
+ if (result)
+ goto disable;
result = nvme_setup_io_queues(dev);
if (result)
- goto disable;
+ goto free_tags;
nvme_set_irq_hints(dev);
return result;
+ free_tags:
+ nvme_dev_remove_admin(dev);
disable:
nvme_disable_queue(dev, 0);
nvme_dev_list_remove(dev);
@@ -2646,6 +2714,9 @@ static int nvme_dev_resume(struct nvme_dev *dev)
dev->reset_workfn = nvme_remove_disks;
queue_work(nvme_workq, &dev->reset_work);
spin_unlock(&dev_list_lock);
+ } else {
+ nvme_unfreeze_queues(dev);
+ nvme_set_irq_hints(dev);
}
dev->initialized = 1;
return 0;
@@ -2783,11 +2854,10 @@ static void nvme_remove(struct pci_dev *pdev)
pci_set_drvdata(pdev, NULL);
flush_work(&dev->reset_work);
misc_deregister(&dev->miscdev);
- nvme_dev_remove(dev);
nvme_dev_shutdown(dev);
+ nvme_dev_remove(dev);
nvme_dev_remove_admin(dev);
nvme_free_queues(dev, 0);
- nvme_free_admin_tags(dev);
nvme_release_prp_pools(dev);
kref_put(&dev->kref, nvme_free_dev);
}
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 3ec85dfce124..8a86b62466f7 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -2098,32 +2098,26 @@ static void rbd_dev_parent_put(struct rbd_device *rbd_dev)
* If an image has a non-zero parent overlap, get a reference to its
* parent.
*
- * We must get the reference before checking for the overlap to
- * coordinate properly with zeroing the parent overlap in
- * rbd_dev_v2_parent_info() when an image gets flattened. We
- * drop it again if there is no overlap.
- *
* Returns true if the rbd device has a parent with a non-zero
* overlap and a reference for it was successfully taken, or
* false otherwise.
*/
static bool rbd_dev_parent_get(struct rbd_device *rbd_dev)
{
- int counter;
+ int counter = 0;
if (!rbd_dev->parent_spec)
return false;
- counter = atomic_inc_return_safe(&rbd_dev->parent_ref);
- if (counter > 0 && rbd_dev->parent_overlap)
- return true;
-
- /* Image was flattened, but parent is not yet torn down */
+ down_read(&rbd_dev->header_rwsem);
+ if (rbd_dev->parent_overlap)
+ counter = atomic_inc_return_safe(&rbd_dev->parent_ref);
+ up_read(&rbd_dev->header_rwsem);
if (counter < 0)
rbd_warn(rbd_dev, "parent reference overflow");
- return false;
+ return counter > 0;
}
/*
@@ -4239,7 +4233,6 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
*/
if (rbd_dev->parent_overlap) {
rbd_dev->parent_overlap = 0;
- smp_mb();
rbd_dev_parent_put(rbd_dev);
pr_info("%s: clone image has been flattened\n",
rbd_dev->disk->disk_name);
@@ -4285,7 +4278,6 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
* treat it specially.
*/
rbd_dev->parent_overlap = overlap;
- smp_mb();
if (!overlap) {
/* A null parent_spec indicates it's the initial probe */
@@ -5114,10 +5106,7 @@ static void rbd_dev_unprobe(struct rbd_device *rbd_dev)
{
struct rbd_image_header *header;
- /* Drop parent reference unless it's already been done (or none) */
-
- if (rbd_dev->parent_overlap)
- rbd_dev_parent_put(rbd_dev);
+ rbd_dev_parent_put(rbd_dev);
/* Free dynamic fields from the header, then zero it out */
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 63fc7f06a014..2a04d341e598 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -47,6 +47,7 @@
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
#include <xen/balloon.h>
+#include <xen/grant_table.h>
#include "common.h"
/*
@@ -100,7 +101,7 @@ module_param(log_stats, int, 0644);
#define BLKBACK_INVALID_HANDLE (~0)
-/* Number of free pages to remove on each call to free_xenballooned_pages */
+/* Number of free pages to remove on each call to gnttab_free_pages */
#define NUM_BATCH_FREE_PAGES 10
static inline int get_free_page(struct xen_blkif *blkif, struct page **page)
@@ -111,7 +112,7 @@ static inline int get_free_page(struct xen_blkif *blkif, struct page **page)
if (list_empty(&blkif->free_pages)) {
BUG_ON(blkif->free_pages_num != 0);
spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
- return alloc_xenballooned_pages(1, page, false);
+ return gnttab_alloc_pages(1, page);
}
BUG_ON(blkif->free_pages_num == 0);
page[0] = list_first_entry(&blkif->free_pages, struct page, lru);
@@ -151,14 +152,14 @@ static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num)
blkif->free_pages_num--;
if (++num_pages == NUM_BATCH_FREE_PAGES) {
spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
- free_xenballooned_pages(num_pages, page);
+ gnttab_free_pages(num_pages, page);
spin_lock_irqsave(&blkif->free_pages_lock, flags);
num_pages = 0;
}
}
spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
if (num_pages != 0)
- free_xenballooned_pages(num_pages, page);
+ gnttab_free_pages(num_pages, page);
}
#define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page)))
@@ -262,6 +263,17 @@ static void put_persistent_gnt(struct xen_blkif *blkif,
atomic_dec(&blkif->persistent_gnt_in_use);
}
+static void free_persistent_gnts_unmap_callback(int result,
+ struct gntab_unmap_queue_data *data)
+{
+ struct completion *c = data->data;
+
+ /* BUG_ON used to reproduce existing behaviour,
+ but is this the best way to deal with this? */
+ BUG_ON(result);
+ complete(c);
+}
+
static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
unsigned int num)
{
@@ -269,8 +281,17 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
struct persistent_gnt *persistent_gnt;
struct rb_node *n;
- int ret = 0;
int segs_to_unmap = 0;
+ struct gntab_unmap_queue_data unmap_data;
+ struct completion unmap_completion;
+
+ init_completion(&unmap_completion);
+
+ unmap_data.data = &unmap_completion;
+ unmap_data.done = &free_persistent_gnts_unmap_callback;
+ unmap_data.pages = pages;
+ unmap_data.unmap_ops = unmap;
+ unmap_data.kunmap_ops = NULL;
foreach_grant_safe(persistent_gnt, n, root, node) {
BUG_ON(persistent_gnt->handle ==
@@ -285,9 +306,11 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST ||
!rb_next(&persistent_gnt->node)) {
- ret = gnttab_unmap_refs(unmap, NULL, pages,
- segs_to_unmap);
- BUG_ON(ret);
+
+ unmap_data.count = segs_to_unmap;
+ gnttab_unmap_refs_async(&unmap_data);
+ wait_for_completion(&unmap_completion);
+
put_free_pages(blkif, pages, segs_to_unmap);
segs_to_unmap = 0;
}
@@ -653,18 +676,14 @@ void xen_blkbk_free_caches(struct xen_blkif *blkif)
shrink_free_pagepool(blkif, 0 /* All */);
}
-/*
- * Unmap the grant references, and also remove the M2P over-rides
- * used in the 'pending_req'.
- */
-static void xen_blkbk_unmap(struct xen_blkif *blkif,
- struct grant_page *pages[],
- int num)
+static unsigned int xen_blkbk_unmap_prepare(
+ struct xen_blkif *blkif,
+ struct grant_page **pages,
+ unsigned int num,
+ struct gnttab_unmap_grant_ref *unmap_ops,
+ struct page **unmap_pages)
{
- struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
- struct page *unmap_pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
unsigned int i, invcount = 0;
- int ret;
for (i = 0; i < num; i++) {
if (pages[i]->persistent_gnt != NULL) {
@@ -674,21 +693,95 @@ static void xen_blkbk_unmap(struct xen_blkif *blkif,
if (pages[i]->handle == BLKBACK_INVALID_HANDLE)
continue;
unmap_pages[invcount] = pages[i]->page;
- gnttab_set_unmap_op(&unmap[invcount], vaddr(pages[i]->page),
+ gnttab_set_unmap_op(&unmap_ops[invcount], vaddr(pages[i]->page),
GNTMAP_host_map, pages[i]->handle);
pages[i]->handle = BLKBACK_INVALID_HANDLE;
- if (++invcount == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
- ret = gnttab_unmap_refs(unmap, NULL, unmap_pages,
- invcount);
+ invcount++;
+ }
+
+ return invcount;
+}
+
+static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_queue_data *data)
+{
+ struct pending_req* pending_req = (struct pending_req*) (data->data);
+ struct xen_blkif *blkif = pending_req->blkif;
+
+ /* BUG_ON used to reproduce existing behaviour,
+ but is this the best way to deal with this? */
+ BUG_ON(result);
+
+ put_free_pages(blkif, data->pages, data->count);
+ make_response(blkif, pending_req->id,
+ pending_req->operation, pending_req->status);
+ free_req(blkif, pending_req);
+ /*
+ * Make sure the request is freed before releasing blkif,
+ * or there could be a race between free_req and the
+ * cleanup done in xen_blkif_free during shutdown.
+ *
+ * NB: The fact that we might try to wake up pending_free_wq
+ * before drain_complete (in case there's a drain going on)
+ * it's not a problem with our current implementation
+ * because we can assure there's no thread waiting on
+ * pending_free_wq if there's a drain going on, but it has
+ * to be taken into account if the current model is changed.
+ */
+ if (atomic_dec_and_test(&blkif->inflight) && atomic_read(&blkif->drain)) {
+ complete(&blkif->drain_complete);
+ }
+ xen_blkif_put(blkif);
+}
+
+static void xen_blkbk_unmap_and_respond(struct pending_req *req)
+{
+ struct gntab_unmap_queue_data* work = &req->gnttab_unmap_data;
+ struct xen_blkif *blkif = req->blkif;
+ struct grant_page **pages = req->segments;
+ unsigned int invcount;
+
+ invcount = xen_blkbk_unmap_prepare(blkif, pages, req->nr_pages,
+ req->unmap, req->unmap_pages);
+
+ work->data = req;
+ work->done = xen_blkbk_unmap_and_respond_callback;
+ work->unmap_ops = req->unmap;
+ work->kunmap_ops = NULL;
+ work->pages = req->unmap_pages;
+ work->count = invcount;
+
+ gnttab_unmap_refs_async(&req->gnttab_unmap_data);
+}
+
+
+/*
+ * Unmap the grant references.
+ *
+ * This could accumulate ops up to the batch size to reduce the number
+ * of hypercalls, but since this is only used in error paths there's
+ * no real need.
+ */
+static void xen_blkbk_unmap(struct xen_blkif *blkif,
+ struct grant_page *pages[],
+ int num)
+{
+ struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ struct page *unmap_pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ unsigned int invcount = 0;
+ int ret;
+
+ while (num) {
+ unsigned int batch = min(num, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+
+ invcount = xen_blkbk_unmap_prepare(blkif, pages, batch,
+ unmap, unmap_pages);
+ if (invcount) {
+ ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount);
BUG_ON(ret);
put_free_pages(blkif, unmap_pages, invcount);
- invcount = 0;
}
- }
- if (invcount) {
- ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount);
- BUG_ON(ret);
- put_free_pages(blkif, unmap_pages, invcount);
+ pages += batch;
+ num -= batch;
}
}
@@ -982,32 +1075,8 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
* the grant references associated with 'request' and provide
* the proper response on the ring.
*/
- if (atomic_dec_and_test(&pending_req->pendcnt)) {
- struct xen_blkif *blkif = pending_req->blkif;
-
- xen_blkbk_unmap(blkif,
- pending_req->segments,
- pending_req->nr_pages);
- make_response(blkif, pending_req->id,
- pending_req->operation, pending_req->status);
- free_req(blkif, pending_req);
- /*
- * Make sure the request is freed before releasing blkif,
- * or there could be a race between free_req and the
- * cleanup done in xen_blkif_free during shutdown.
- *
- * NB: The fact that we might try to wake up pending_free_wq
- * before drain_complete (in case there's a drain going on)
- * it's not a problem with our current implementation
- * because we can assure there's no thread waiting on
- * pending_free_wq if there's a drain going on, but it has
- * to be taken into account if the current model is changed.
- */
- if (atomic_dec_and_test(&blkif->inflight) && atomic_read(&blkif->drain)) {
- complete(&blkif->drain_complete);
- }
- xen_blkif_put(blkif);
- }
+ if (atomic_dec_and_test(&pending_req->pendcnt))
+ xen_blkbk_unmap_and_respond(pending_req);
}
/*
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index f65b807e3236..cc90a840e616 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -350,6 +350,9 @@ struct pending_req {
struct grant_page *indirect_pages[MAX_INDIRECT_PAGES];
struct seg_buf seg[MAX_INDIRECT_SEGMENTS];
struct bio *biolist[MAX_INDIRECT_SEGMENTS];
+ struct gnttab_unmap_grant_ref unmap[MAX_INDIRECT_SEGMENTS];
+ struct page *unmap_pages[MAX_INDIRECT_SEGMENTS];
+ struct gntab_unmap_queue_data gnttab_unmap_data;
};