aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/bio.c62
-rw-r--r--block/blk-core.c3
-rw-r--r--block/blk-mq-debugfs.c1
-rw-r--r--block/blk-mq.c16
-rw-r--r--block/bsg-lib.c44
-rw-r--r--block/bsg.c190
6 files changed, 171 insertions, 145 deletions
diff --git a/block/bio.c b/block/bio.c
index 83a2dfa417ca..71a78d9fb8b7 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -836,6 +836,40 @@ int bio_add_page(struct bio *bio, struct page *page,
}
EXPORT_SYMBOL(bio_add_page);
+static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter)
+{
+ const struct bio_vec *bv = iter->bvec;
+ unsigned int len;
+ size_t size;
+
+ if (WARN_ON_ONCE(iter->iov_offset > bv->bv_len))
+ return -EINVAL;
+
+ len = min_t(size_t, bv->bv_len - iter->iov_offset, iter->count);
+ size = bio_add_page(bio, bv->bv_page, len,
+ bv->bv_offset + iter->iov_offset);
+ if (size == len) {
+ struct page *page;
+ int i;
+
+ /*
+ * For the normal O_DIRECT case, we could skip grabbing this
+ * reference and then not have to put them again when IO
+ * completes. But this breaks some in-kernel users, like
+ * splicing to/from a loop device, where we release the pipe
+ * pages unconditionally. If we can fix that case, we can
+ * get rid of the get here and the need to call
+ * bio_release_pages() at IO completion time.
+ */
+ mp_bvec_for_each_page(page, bv, i)
+ get_page(page);
+ iov_iter_advance(iter, size);
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
#define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *))
/**
@@ -884,23 +918,35 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
}
/**
- * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
+ * bio_iov_iter_get_pages - add user or kernel pages to a bio
* @bio: bio to add pages to
- * @iter: iov iterator describing the region to be mapped
+ * @iter: iov iterator describing the region to be added
+ *
+ * This takes either an iterator pointing to user memory, or one pointing to
+ * kernel pages (BVEC iterator). If we're adding user pages, we pin them and
+ * map them into the kernel. On IO completion, the caller should put those
+ * pages. For now, when adding kernel pages, we still grab a reference to the
+ * page. This isn't strictly needed for the common case, but some call paths
+ * end up releasing pages from eg a pipe and we can't easily control these.
+ * See comment in __bio_iov_bvec_add_pages().
*
- * Pins pages from *iter and appends them to @bio's bvec array. The
- * pages will have to be released using put_page() when done.
* The function tries, but does not guarantee, to pin as many pages as
- * fit into the bio, or are requested in *iter, whatever is smaller.
- * If MM encounters an error pinning the requested pages, it stops.
- * Error is returned only if 0 pages could be pinned.
+ * fit into the bio, or are requested in *iter, whatever is smaller. If
+ * MM encounters an error pinning the requested pages, it stops. Error
+ * is returned only if 0 pages could be pinned.
*/
int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
{
+ const bool is_bvec = iov_iter_is_bvec(iter);
unsigned short orig_vcnt = bio->bi_vcnt;
do {
- int ret = __bio_iov_iter_get_pages(bio, iter);
+ int ret;
+
+ if (is_bvec)
+ ret = __bio_iov_bvec_add_pages(bio, iter);
+ else
+ ret = __bio_iov_iter_get_pages(bio, iter);
if (unlikely(ret))
return bio->bi_vcnt > orig_vcnt ? 0 : ret;
diff --git a/block/blk-core.c b/block/blk-core.c
index 6b78ec56a4f2..4673ebe42255 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -500,8 +500,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
if (!q->stats)
goto fail_stats;
- q->backing_dev_info->ra_pages =
- (VM_MAX_READAHEAD * 1024) / PAGE_SIZE;
+ q->backing_dev_info->ra_pages = VM_READAHEAD_PAGES;
q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK;
q->backing_dev_info->name = "block";
q->node = node_id;
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index bac34b72b33b..ec1d18cb643c 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -115,7 +115,6 @@ static int queue_pm_only_show(void *data, struct seq_file *m)
static const char *const blk_queue_flag_name[] = {
QUEUE_FLAG_NAME(STOPPED),
QUEUE_FLAG_NAME(DYING),
- QUEUE_FLAG_NAME(BIDI),
QUEUE_FLAG_NAME(NOMERGES),
QUEUE_FLAG_NAME(SAME_COMP),
QUEUE_FLAG_NAME(FAIL_IO),
diff --git a/block/blk-mq.c b/block/blk-mq.c
index fa024bce2b38..a9c181603cbd 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -331,7 +331,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
#if defined(CONFIG_BLK_DEV_INTEGRITY)
rq->nr_integrity_segments = 0;
#endif
- rq->special = NULL;
/* tag was already set */
rq->extra_len = 0;
WRITE_ONCE(rq->deadline, 0);
@@ -340,7 +339,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
rq->end_io = NULL;
rq->end_io_data = NULL;
- rq->next_rq = NULL;
data->ctx->rq_dispatched[op_is_sync(op)]++;
refcount_set(&rq->ref, 1);
@@ -550,8 +548,6 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
rq_qos_done(rq->q, rq);
rq->end_io(rq, error);
} else {
- if (unlikely(blk_bidi_rq(rq)))
- blk_mq_free_request(rq->next_rq);
blk_mq_free_request(rq);
}
}
@@ -737,12 +733,20 @@ static void blk_mq_requeue_work(struct work_struct *work)
spin_unlock_irq(&q->requeue_lock);
list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
- if (!(rq->rq_flags & RQF_SOFTBARRIER))
+ if (!(rq->rq_flags & (RQF_SOFTBARRIER | RQF_DONTPREP)))
continue;
rq->rq_flags &= ~RQF_SOFTBARRIER;
list_del_init(&rq->queuelist);
- blk_mq_sched_insert_request(rq, true, false, false);
+ /*
+ * If RQF_DONTPREP, rq has contained some driver specific
+ * data, so insert it to hctx dispatch list to avoid any
+ * merge.
+ */
+ if (rq->rq_flags & RQF_DONTPREP)
+ blk_mq_request_bypass_insert(rq, false);
+ else
+ blk_mq_sched_insert_request(rq, true, false, false);
}
while (!list_empty(&rq_list)) {
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 192129856342..005e2b75d775 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -51,11 +51,40 @@ static int bsg_transport_fill_hdr(struct request *rq, struct sg_io_v4 *hdr,
fmode_t mode)
{
struct bsg_job *job = blk_mq_rq_to_pdu(rq);
+ int ret;
job->request_len = hdr->request_len;
job->request = memdup_user(uptr64(hdr->request), hdr->request_len);
+ if (IS_ERR(job->request))
+ return PTR_ERR(job->request);
+
+ if (hdr->dout_xfer_len && hdr->din_xfer_len) {
+ job->bidi_rq = blk_get_request(rq->q, REQ_OP_SCSI_IN, 0);
+ if (IS_ERR(job->bidi_rq)) {
+ ret = PTR_ERR(job->bidi_rq);
+ goto out;
+ }
+
+ ret = blk_rq_map_user(rq->q, job->bidi_rq, NULL,
+ uptr64(hdr->din_xferp), hdr->din_xfer_len,
+ GFP_KERNEL);
+ if (ret)
+ goto out_free_bidi_rq;
+
+ job->bidi_bio = job->bidi_rq->bio;
+ } else {
+ job->bidi_rq = NULL;
+ job->bidi_bio = NULL;
+ }
- return PTR_ERR_OR_ZERO(job->request);
+ return 0;
+
+out_free_bidi_rq:
+ if (job->bidi_rq)
+ blk_put_request(job->bidi_rq);
+out:
+ kfree(job->request);
+ return ret;
}
static int bsg_transport_complete_rq(struct request *rq, struct sg_io_v4 *hdr)
@@ -93,7 +122,7 @@ static int bsg_transport_complete_rq(struct request *rq, struct sg_io_v4 *hdr)
/* we assume all request payload was transferred, residual == 0 */
hdr->dout_resid = 0;
- if (rq->next_rq) {
+ if (job->bidi_rq) {
unsigned int rsp_len = job->reply_payload.payload_len;
if (WARN_ON(job->reply_payload_rcv_len > rsp_len))
@@ -111,6 +140,11 @@ static void bsg_transport_free_rq(struct request *rq)
{
struct bsg_job *job = blk_mq_rq_to_pdu(rq);
+ if (job->bidi_rq) {
+ blk_rq_unmap_user(job->bidi_bio);
+ blk_put_request(job->bidi_rq);
+ }
+
kfree(job->request);
}
@@ -200,7 +234,6 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
*/
static bool bsg_prepare_job(struct device *dev, struct request *req)
{
- struct request *rsp = req->next_rq;
struct bsg_job *job = blk_mq_rq_to_pdu(req);
int ret;
@@ -211,8 +244,8 @@ static bool bsg_prepare_job(struct device *dev, struct request *req)
if (ret)
goto failjob_rls_job;
}
- if (rsp && rsp->bio) {
- ret = bsg_map_buffer(&job->reply_payload, rsp);
+ if (job->bidi_rq) {
+ ret = bsg_map_buffer(&job->reply_payload, job->bidi_rq);
if (ret)
goto failjob_rls_rqst_payload;
}
@@ -369,7 +402,6 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
}
q->queuedata = dev;
- blk_queue_flag_set(QUEUE_FLAG_BIDI, q);
blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
ret = bsg_register_queue(q, dev, name, &bsg_transport_ops);
diff --git a/block/bsg.c b/block/bsg.c
index 50e5f8f666f2..f306853c6b08 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -74,6 +74,11 @@ static int bsg_scsi_fill_hdr(struct request *rq, struct sg_io_v4 *hdr,
{
struct scsi_request *sreq = scsi_req(rq);
+ if (hdr->dout_xfer_len && hdr->din_xfer_len) {
+ pr_warn_once("BIDI support in bsg has been removed.\n");
+ return -EOPNOTSUPP;
+ }
+
sreq->cmd_len = hdr->request_len;
if (sreq->cmd_len > BLK_MAX_CDB) {
sreq->cmd = kzalloc(sreq->cmd_len, GFP_KERNEL);
@@ -114,14 +119,10 @@ static int bsg_scsi_complete_rq(struct request *rq, struct sg_io_v4 *hdr)
hdr->response_len = len;
}
- if (rq->next_rq) {
- hdr->dout_resid = sreq->resid_len;
- hdr->din_resid = scsi_req(rq->next_rq)->resid_len;
- } else if (rq_data_dir(rq) == READ) {
+ if (rq_data_dir(rq) == READ)
hdr->din_resid = sreq->resid_len;
- } else {
+ else
hdr->dout_resid = sreq->resid_len;
- }
return ret;
}
@@ -138,32 +139,35 @@ static const struct bsg_ops bsg_scsi_ops = {
.free_rq = bsg_scsi_free_rq,
};
-static struct request *
-bsg_map_hdr(struct request_queue *q, struct sg_io_v4 *hdr, fmode_t mode)
+static int bsg_sg_io(struct request_queue *q, fmode_t mode, void __user *uarg)
{
- struct request *rq, *next_rq = NULL;
+ struct request *rq;
+ struct bio *bio;
+ struct sg_io_v4 hdr;
int ret;
- if (!q->bsg_dev.class_dev)
- return ERR_PTR(-ENXIO);
+ if (copy_from_user(&hdr, uarg, sizeof(hdr)))
+ return -EFAULT;
- if (hdr->guard != 'Q')
- return ERR_PTR(-EINVAL);
+ if (!q->bsg_dev.class_dev)
+ return -ENXIO;
- ret = q->bsg_dev.ops->check_proto(hdr);
+ if (hdr.guard != 'Q')
+ return -EINVAL;
+ ret = q->bsg_dev.ops->check_proto(&hdr);
if (ret)
- return ERR_PTR(ret);
+ return ret;
- rq = blk_get_request(q, hdr->dout_xfer_len ?
+ rq = blk_get_request(q, hdr.dout_xfer_len ?
REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
if (IS_ERR(rq))
- return rq;
+ return PTR_ERR(rq);
- ret = q->bsg_dev.ops->fill_hdr(rq, hdr, mode);
+ ret = q->bsg_dev.ops->fill_hdr(rq, &hdr, mode);
if (ret)
- goto out;
+ return ret;
- rq->timeout = msecs_to_jiffies(hdr->timeout);
+ rq->timeout = msecs_to_jiffies(hdr.timeout);
if (!rq->timeout)
rq->timeout = q->sg_timeout;
if (!rq->timeout)
@@ -171,68 +175,28 @@ bsg_map_hdr(struct request_queue *q, struct sg_io_v4 *hdr, fmode_t mode)
if (rq->timeout < BLK_MIN_SG_TIMEOUT)
rq->timeout = BLK_MIN_SG_TIMEOUT;
- if (hdr->dout_xfer_len && hdr->din_xfer_len) {
- if (!test_bit(QUEUE_FLAG_BIDI, &q->queue_flags)) {
- ret = -EOPNOTSUPP;
- goto out;
- }
-
- pr_warn_once(
- "BIDI support in bsg has been deprecated and might be removed. "
- "Please report your use case to linux-scsi@vger.kernel.org\n");
-
- next_rq = blk_get_request(q, REQ_OP_SCSI_IN, 0);
- if (IS_ERR(next_rq)) {
- ret = PTR_ERR(next_rq);
- goto out;
- }
-
- rq->next_rq = next_rq;
- ret = blk_rq_map_user(q, next_rq, NULL, uptr64(hdr->din_xferp),
- hdr->din_xfer_len, GFP_KERNEL);
- if (ret)
- goto out_free_nextrq;
- }
-
- if (hdr->dout_xfer_len) {
- ret = blk_rq_map_user(q, rq, NULL, uptr64(hdr->dout_xferp),
- hdr->dout_xfer_len, GFP_KERNEL);
- } else if (hdr->din_xfer_len) {
- ret = blk_rq_map_user(q, rq, NULL, uptr64(hdr->din_xferp),
- hdr->din_xfer_len, GFP_KERNEL);
+ if (hdr.dout_xfer_len) {
+ ret = blk_rq_map_user(q, rq, NULL, uptr64(hdr.dout_xferp),
+ hdr.dout_xfer_len, GFP_KERNEL);
+ } else if (hdr.din_xfer_len) {
+ ret = blk_rq_map_user(q, rq, NULL, uptr64(hdr.din_xferp),
+ hdr.din_xfer_len, GFP_KERNEL);
}
if (ret)
- goto out_unmap_nextrq;
- return rq;
-
-out_unmap_nextrq:
- if (rq->next_rq)
- blk_rq_unmap_user(rq->next_rq->bio);
-out_free_nextrq:
- if (rq->next_rq)
- blk_put_request(rq->next_rq);
-out:
- q->bsg_dev.ops->free_rq(rq);
- blk_put_request(rq);
- return ERR_PTR(ret);
-}
+ goto out_free_rq;
-static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
- struct bio *bio, struct bio *bidi_bio)
-{
- int ret;
-
- ret = rq->q->bsg_dev.ops->complete_rq(rq, hdr);
-
- if (rq->next_rq) {
- blk_rq_unmap_user(bidi_bio);
- blk_put_request(rq->next_rq);
- }
+ bio = rq->bio;
+ blk_execute_rq(q, NULL, rq, !(hdr.flags & BSG_FLAG_Q_AT_TAIL));
+ ret = rq->q->bsg_dev.ops->complete_rq(rq, &hdr);
blk_rq_unmap_user(bio);
+
+out_free_rq:
rq->q->bsg_dev.ops->free_rq(rq);
blk_put_request(rq);
+ if (!ret && copy_to_user(uarg, &hdr, sizeof(hdr)))
+ return -EFAULT;
return ret;
}
@@ -367,31 +331,39 @@ static int bsg_release(struct inode *inode, struct file *file)
return bsg_put_device(bd);
}
+static int bsg_get_command_q(struct bsg_device *bd, int __user *uarg)
+{
+ return put_user(bd->max_queue, uarg);
+}
+
+static int bsg_set_command_q(struct bsg_device *bd, int __user *uarg)
+{
+ int queue;
+
+ if (get_user(queue, uarg))
+ return -EFAULT;
+ if (queue < 1)
+ return -EINVAL;
+
+ spin_lock_irq(&bd->lock);
+ bd->max_queue = queue;
+ spin_unlock_irq(&bd->lock);
+ return 0;
+}
+
static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
struct bsg_device *bd = file->private_data;
- int __user *uarg = (int __user *) arg;
- int ret;
+ void __user *uarg = (void __user *) arg;
switch (cmd) {
- /*
- * our own ioctls
- */
+ /*
+ * Our own ioctls
+ */
case SG_GET_COMMAND_Q:
- return put_user(bd->max_queue, uarg);
- case SG_SET_COMMAND_Q: {
- int queue;
-
- if (get_user(queue, uarg))
- return -EFAULT;
- if (queue < 1)
- return -EINVAL;
-
- spin_lock_irq(&bd->lock);
- bd->max_queue = queue;
- spin_unlock_irq(&bd->lock);
- return 0;
- }
+ return bsg_get_command_q(bd, uarg);
+ case SG_SET_COMMAND_Q:
+ return bsg_set_command_q(bd, uarg);
/*
* SCSI/sg ioctls
@@ -404,36 +376,10 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case SG_GET_RESERVED_SIZE:
case SG_SET_RESERVED_SIZE:
case SG_EMULATED_HOST:
- case SCSI_IOCTL_SEND_COMMAND: {
- void __user *uarg = (void __user *) arg;
+ case SCSI_IOCTL_SEND_COMMAND:
return scsi_cmd_ioctl(bd->queue, NULL, file->f_mode, cmd, uarg);
- }
- case SG_IO: {
- struct request *rq;
- struct bio *bio, *bidi_bio = NULL;
- struct sg_io_v4 hdr;
- int at_head;
-
- if (copy_from_user(&hdr, uarg, sizeof(hdr)))
- return -EFAULT;
-
- rq = bsg_map_hdr(bd->queue, &hdr, file->f_mode);
- if (IS_ERR(rq))
- return PTR_ERR(rq);
-
- bio = rq->bio;
- if (rq->next_rq)
- bidi_bio = rq->next_rq->bio;
-
- at_head = (0 == (hdr.flags & BSG_FLAG_Q_AT_TAIL));
- blk_execute_rq(bd->queue, NULL, rq, at_head);
- ret = blk_complete_sgv4_hdr_rq(rq, &hdr, bio, bidi_bio);
-
- if (copy_to_user(uarg, &hdr, sizeof(hdr)))
- return -EFAULT;
-
- return ret;
- }
+ case SG_IO:
+ return bsg_sg_io(bd->queue, file->f_mode, uarg);
default:
return -ENOTTY;
}