aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-12-21 11:13:37 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2017-12-21 11:13:37 -0800
commit9035a8961b504d0997369509ab8c6b1f0a4ee33d (patch)
tree99aad197f1c60efe849185fff7a8f14a7792f748
parentMerge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm (diff)
parentRevert "bdi: add error handle for bdi_debug_register" (diff)
downloadlinux-dev-9035a8961b504d0997369509ab8c6b1f0a4ee33d.tar.xz
linux-dev-9035a8961b504d0997369509ab8c6b1f0a4ee33d.zip
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: "It's been a few weeks, so here's a small collection of fixes that should go into the current series. This contains: - NVMe pull request from Christoph, with a few important fixes. - kyber hang fix from Omar. - A blk-throttl fix from Shaohua, fixing a case where we double charge a bio. - Two call_single_data alignment fixes from me, fixing up some unfortunate changes that went into 4.14 without being properly reviewed on the block side (since nobody was CC'ed on the patch...). - A bounce buffer fix in two parts, one from me and one from Ming. - Revert bdi debug error handling patch. It's causing boot issues for some folks, and a week down the line, we're still no closer to a fix. Revert this patch for now until it's figured out, then we can retry for 4.16" * 'for-linus' of git://git.kernel.dk/linux-block: Revert "bdi: add error handle for bdi_debug_register" null_blk: unalign call_single_data block: unalign call_single_data in struct request block-throttle: avoid double charge block: fix blk_rq_append_bio block: don't let passthrough IO go into .make_request_fn() nvme: setup streams after initializing namespace head nvme: check hw sectors before setting chunk sectors nvme: call blk_integrity_unregister after queue is cleaned up nvme-fc: remove double put reference if admin connect fails nvme: set discard_alignment to zero kyber: fix another domain token wait queue hang
-rw-r--r--block/bio.c2
-rw-r--r--block/blk-map.c38
-rw-r--r--block/blk-throttle.c8
-rw-r--r--block/bounce.c6
-rw-r--r--block/kyber-iosched.c37
-rw-r--r--drivers/block/null_blk.c4
-rw-r--r--drivers/nvme/host/core.c11
-rw-r--r--drivers/nvme/host/fc.c1
-rw-r--r--drivers/scsi/osd/osd_initiator.c4
-rw-r--r--drivers/target/target_core_pscsi.c4
-rw-r--r--include/linux/bio.h2
-rw-r--r--include/linux/blk_types.h9
-rw-r--r--include/linux/blkdev.h25
-rw-r--r--mm/backing-dev.c5
14 files changed, 94 insertions, 62 deletions
diff --git a/block/bio.c b/block/bio.c
index 8bfdea58159b..9ef6cf3addb3 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -599,6 +599,8 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
bio->bi_disk = bio_src->bi_disk;
bio->bi_partno = bio_src->bi_partno;
bio_set_flag(bio, BIO_CLONED);
+ if (bio_flagged(bio_src, BIO_THROTTLED))
+ bio_set_flag(bio, BIO_THROTTLED);
bio->bi_opf = bio_src->bi_opf;
bio->bi_write_hint = bio_src->bi_write_hint;
bio->bi_iter = bio_src->bi_iter;
diff --git a/block/blk-map.c b/block/blk-map.c
index b21f8e86f120..d3a94719f03f 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -12,22 +12,29 @@
#include "blk.h"
/*
- * Append a bio to a passthrough request. Only works can be merged into
- * the request based on the driver constraints.
+ * Append a bio to a passthrough request. Only works if the bio can be merged
+ * into the request based on the driver constraints.
*/
-int blk_rq_append_bio(struct request *rq, struct bio *bio)
+int blk_rq_append_bio(struct request *rq, struct bio **bio)
{
- blk_queue_bounce(rq->q, &bio);
+ struct bio *orig_bio = *bio;
+
+ blk_queue_bounce(rq->q, bio);
if (!rq->bio) {
- blk_rq_bio_prep(rq->q, rq, bio);
+ blk_rq_bio_prep(rq->q, rq, *bio);
} else {
- if (!ll_back_merge_fn(rq->q, rq, bio))
+ if (!ll_back_merge_fn(rq->q, rq, *bio)) {
+ if (orig_bio != *bio) {
+ bio_put(*bio);
+ *bio = orig_bio;
+ }
return -EINVAL;
+ }
- rq->biotail->bi_next = bio;
- rq->biotail = bio;
- rq->__data_len += bio->bi_iter.bi_size;
+ rq->biotail->bi_next = *bio;
+ rq->biotail = *bio;
+ rq->__data_len += (*bio)->bi_iter.bi_size;
}
return 0;
@@ -73,14 +80,12 @@ static int __blk_rq_map_user_iov(struct request *rq,
* We link the bounce buffer in and could have to traverse it
* later so we have to get a ref to prevent it from being freed
*/
- ret = blk_rq_append_bio(rq, bio);
- bio_get(bio);
+ ret = blk_rq_append_bio(rq, &bio);
if (ret) {
- bio_endio(bio);
__blk_rq_unmap_user(orig_bio);
- bio_put(bio);
return ret;
}
+ bio_get(bio);
return 0;
}
@@ -213,7 +218,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
int reading = rq_data_dir(rq) == READ;
unsigned long addr = (unsigned long) kbuf;
int do_copy = 0;
- struct bio *bio;
+ struct bio *bio, *orig_bio;
int ret;
if (len > (queue_max_hw_sectors(q) << 9))
@@ -236,10 +241,11 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
if (do_copy)
rq->rq_flags |= RQF_COPY_USER;
- ret = blk_rq_append_bio(rq, bio);
+ orig_bio = bio;
+ ret = blk_rq_append_bio(rq, &bio);
if (unlikely(ret)) {
/* request is too big */
- bio_put(bio);
+ bio_put(orig_bio);
return ret;
}
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 825bc29767e6..d19f416d6101 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2226,13 +2226,7 @@ again:
out_unlock:
spin_unlock_irq(q->queue_lock);
out:
- /*
- * As multiple blk-throtls may stack in the same issue path, we
- * don't want bios to leave with the flag set. Clear the flag if
- * being issued.
- */
- if (!throttled)
- bio_clear_flag(bio, BIO_THROTTLED);
+ bio_set_flag(bio, BIO_THROTTLED);
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
if (throttled || !td->track_bio_latency)
diff --git a/block/bounce.c b/block/bounce.c
index fceb1a96480b..1d05c422c932 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -200,6 +200,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
unsigned i = 0;
bool bounce = false;
int sectors = 0;
+ bool passthrough = bio_is_passthrough(*bio_orig);
bio_for_each_segment(from, *bio_orig, iter) {
if (i++ < BIO_MAX_PAGES)
@@ -210,13 +211,14 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
if (!bounce)
return;
- if (sectors < bio_sectors(*bio_orig)) {
+ if (!passthrough && sectors < bio_sectors(*bio_orig)) {
bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
bio_chain(bio, *bio_orig);
generic_make_request(*bio_orig);
*bio_orig = bio;
}
- bio = bio_clone_bioset(*bio_orig, GFP_NOIO, bounce_bio_set);
+ bio = bio_clone_bioset(*bio_orig, GFP_NOIO, passthrough ? NULL :
+ bounce_bio_set);
bio_for_each_segment_all(to, bio, i) {
struct page *page = to->bv_page;
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index b4df317c2916..f95c60774ce8 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -100,9 +100,13 @@ struct kyber_hctx_data {
unsigned int cur_domain;
unsigned int batching;
wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS];
+ struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS];
atomic_t wait_index[KYBER_NUM_DOMAINS];
};
+static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
+ void *key);
+
static int rq_sched_domain(const struct request *rq)
{
unsigned int op = rq->cmd_flags;
@@ -385,6 +389,9 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
INIT_LIST_HEAD(&khd->rqs[i]);
+ init_waitqueue_func_entry(&khd->domain_wait[i],
+ kyber_domain_wake);
+ khd->domain_wait[i].private = hctx;
INIT_LIST_HEAD(&khd->domain_wait[i].entry);
atomic_set(&khd->wait_index[i], 0);
}
@@ -524,35 +531,39 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd,
int nr;
nr = __sbitmap_queue_get(domain_tokens);
- if (nr >= 0)
- return nr;
/*
* If we failed to get a domain token, make sure the hardware queue is
* run when one becomes available. Note that this is serialized on
* khd->lock, but we still need to be careful about the waker.
*/
- if (list_empty_careful(&wait->entry)) {
- init_waitqueue_func_entry(wait, kyber_domain_wake);
- wait->private = hctx;
+ if (nr < 0 && list_empty_careful(&wait->entry)) {
ws = sbq_wait_ptr(domain_tokens,
&khd->wait_index[sched_domain]);
+ khd->domain_ws[sched_domain] = ws;
add_wait_queue(&ws->wait, wait);
/*
* Try again in case a token was freed before we got on the wait
- * queue. The waker may have already removed the entry from the
- * wait queue, but list_del_init() is okay with that.
+ * queue.
*/
nr = __sbitmap_queue_get(domain_tokens);
- if (nr >= 0) {
- unsigned long flags;
+ }
- spin_lock_irqsave(&ws->wait.lock, flags);
- list_del_init(&wait->entry);
- spin_unlock_irqrestore(&ws->wait.lock, flags);
- }
+ /*
+ * If we got a token while we were on the wait queue, remove ourselves
+ * from the wait queue to ensure that all wake ups make forward
+ * progress. It's possible that the waker already deleted the entry
+ * between the !list_empty_careful() check and us grabbing the lock, but
+ * list_del_init() is okay with that.
+ */
+ if (nr >= 0 && !list_empty_careful(&wait->entry)) {
+ ws = khd->domain_ws[sched_domain];
+ spin_lock_irq(&ws->wait.lock);
+ list_del_init(&wait->entry);
+ spin_unlock_irq(&ws->wait.lock);
}
+
return nr;
}
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index ccb9975a97fa..ad0477ae820f 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -35,13 +35,13 @@ static inline u64 mb_per_tick(int mbps)
struct nullb_cmd {
struct list_head list;
struct llist_node ll_list;
- call_single_data_t csd;
+ struct __call_single_data csd;
struct request *rq;
struct bio *bio;
unsigned int tag;
+ blk_status_t error;
struct nullb_queue *nq;
struct hrtimer timer;
- blk_status_t error;
};
struct nullb_queue {
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f837d666cbd4..1e46e60b8f10 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1287,7 +1287,7 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl,
BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
NVME_DSM_MAX_RANGES);
- queue->limits.discard_alignment = size;
+ queue->limits.discard_alignment = 0;
queue->limits.discard_granularity = size;
blk_queue_max_discard_sectors(queue, UINT_MAX);
@@ -1705,7 +1705,8 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
}
- if (ctrl->quirks & NVME_QUIRK_STRIPE_SIZE)
+ if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
+ is_power_of_2(ctrl->max_hw_sectors))
blk_queue_chunk_sectors(q, ctrl->max_hw_sectors);
blk_queue_virt_boundary(q, ctrl->page_size - 1);
if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
@@ -2869,7 +2870,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
nvme_set_queue_limits(ctrl, ns->queue);
- nvme_setup_streams_ns(ctrl, ns);
id = nvme_identify_ns(ctrl, nsid);
if (!id)
@@ -2880,6 +2880,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
if (nvme_init_ns_head(ns, nsid, id, &new))
goto out_free_id;
+ nvme_setup_streams_ns(ctrl, ns);
#ifdef CONFIG_NVME_MULTIPATH
/*
@@ -2965,8 +2966,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
return;
if (ns->disk && ns->disk->flags & GENHD_FL_UP) {
- if (blk_get_integrity(ns->disk))
- blk_integrity_unregister(ns->disk);
nvme_mpath_remove_disk_links(ns);
sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
&nvme_ns_id_attr_group);
@@ -2974,6 +2973,8 @@ static void nvme_ns_remove(struct nvme_ns *ns)
nvme_nvm_unregister_sysfs(ns);
del_gendisk(ns->disk);
blk_cleanup_queue(ns->queue);
+ if (blk_get_integrity(ns->disk))
+ blk_integrity_unregister(ns->disk);
}
mutex_lock(&ns->ctrl->subsys->lock);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 0a8af4daef89..794e66e4aa20 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3221,7 +3221,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
/* initiate nvme ctrl ref counting teardown */
nvme_uninit_ctrl(&ctrl->ctrl);
- nvme_put_ctrl(&ctrl->ctrl);
/* Remove core ctrl ref. */
nvme_put_ctrl(&ctrl->ctrl);
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index a4f28b7e4c65..e18877177f1b 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -1576,7 +1576,9 @@ static struct request *_make_request(struct request_queue *q, bool has_write,
return req;
for_each_bio(bio) {
- ret = blk_rq_append_bio(req, bio);
+ struct bio *bounce_bio = bio;
+
+ ret = blk_rq_append_bio(req, &bounce_bio);
if (ret)
return ERR_PTR(ret);
}
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 7c69b4a9694d..0d99b242e82e 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -920,7 +920,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
" %d i: %d bio: %p, allocating another"
" bio\n", bio->bi_vcnt, i, bio);
- rc = blk_rq_append_bio(req, bio);
+ rc = blk_rq_append_bio(req, &bio);
if (rc) {
pr_err("pSCSI: failed to append bio\n");
goto fail;
@@ -938,7 +938,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
}
if (bio) {
- rc = blk_rq_append_bio(req, bio);
+ rc = blk_rq_append_bio(req, &bio);
if (rc) {
pr_err("pSCSI: failed to append bio\n");
goto fail;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 82f0c8fd7be8..23d29b39f71e 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -492,6 +492,8 @@ extern unsigned int bvec_nr_vecs(unsigned short idx);
#define bio_set_dev(bio, bdev) \
do { \
+ if ((bio)->bi_disk != (bdev)->bd_disk) \
+ bio_clear_flag(bio, BIO_THROTTLED);\
(bio)->bi_disk = (bdev)->bd_disk; \
(bio)->bi_partno = (bdev)->bd_partno; \
} while (0)
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index a1e628e032da..9e7d8bd776d2 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -50,8 +50,6 @@ struct blk_issue_stat {
struct bio {
struct bio *bi_next; /* request queue link */
struct gendisk *bi_disk;
- u8 bi_partno;
- blk_status_t bi_status;
unsigned int bi_opf; /* bottom bits req flags,
* top bits REQ_OP. Use
* accessors.
@@ -59,8 +57,8 @@ struct bio {
unsigned short bi_flags; /* status, etc and bvec pool number */
unsigned short bi_ioprio;
unsigned short bi_write_hint;
-
- struct bvec_iter bi_iter;
+ blk_status_t bi_status;
+ u8 bi_partno;
/* Number of segments in this BIO after
* physical address coalescing is performed.
@@ -74,8 +72,9 @@ struct bio {
unsigned int bi_seg_front_size;
unsigned int bi_seg_back_size;
- atomic_t __bi_remaining;
+ struct bvec_iter bi_iter;
+ atomic_t __bi_remaining;
bio_end_io_t *bi_end_io;
void *bi_private;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8089ca17db9a..0ce8a372d506 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -135,7 +135,7 @@ typedef __u32 __bitwise req_flags_t;
struct request {
struct list_head queuelist;
union {
- call_single_data_t csd;
+ struct __call_single_data csd;
u64 fifo_time;
};
@@ -241,14 +241,24 @@ struct request {
struct request *next_rq;
};
+static inline bool blk_op_is_scsi(unsigned int op)
+{
+ return op == REQ_OP_SCSI_IN || op == REQ_OP_SCSI_OUT;
+}
+
+static inline bool blk_op_is_private(unsigned int op)
+{
+ return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT;
+}
+
static inline bool blk_rq_is_scsi(struct request *rq)
{
- return req_op(rq) == REQ_OP_SCSI_IN || req_op(rq) == REQ_OP_SCSI_OUT;
+ return blk_op_is_scsi(req_op(rq));
}
static inline bool blk_rq_is_private(struct request *rq)
{
- return req_op(rq) == REQ_OP_DRV_IN || req_op(rq) == REQ_OP_DRV_OUT;
+ return blk_op_is_private(req_op(rq));
}
static inline bool blk_rq_is_passthrough(struct request *rq)
@@ -256,6 +266,13 @@ static inline bool blk_rq_is_passthrough(struct request *rq)
return blk_rq_is_scsi(rq) || blk_rq_is_private(rq);
}
+static inline bool bio_is_passthrough(struct bio *bio)
+{
+ unsigned op = bio_op(bio);
+
+ return blk_op_is_scsi(op) || blk_op_is_private(op);
+}
+
static inline unsigned short req_get_ioprio(struct request *req)
{
return req->ioprio;
@@ -948,7 +965,7 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
extern void blk_rq_unprep_clone(struct request *rq);
extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
struct request *rq);
-extern int blk_rq_append_bio(struct request *rq, struct bio *bio);
+extern int blk_rq_append_bio(struct request *rq, struct bio **bio);
extern void blk_delay_queue(struct request_queue *, unsigned long);
extern void blk_queue_split(struct request_queue *, struct bio **);
extern void blk_recount_segments(struct request_queue *, struct bio *);
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 84b2dc76f140..b5f940ce0143 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -882,13 +882,10 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
if (IS_ERR(dev))
return PTR_ERR(dev);
- if (bdi_debug_register(bdi, dev_name(dev))) {
- device_destroy(bdi_class, dev->devt);
- return -ENOMEM;
- }
cgwb_bdi_register(bdi);
bdi->dev = dev;
+ bdi_debug_register(bdi, dev_name(dev));
set_bit(WB_registered, &bdi->wb.state);
spin_lock_bh(&bdi_lock);