Merge tag 'for-4.17/block-20180402' of git://git.kernel.dk/linux-block

Pull block layer updates from Jens Axboe: "It's a pretty quiet round this time, which is nice. This contains: - series from Bart, cleaning up the way we set/test/clear atomic queue flags. - series from Bart, fixing races between gendisk and queue registration and removal. - set of bcache fixes and improvements from various folks, by way of Michael Lyle. - set of lightnvm updates from Matias, most of it being the 1.2 to 2.0 transition. - removal of unused DIO flags from Nikolay. - blk-mq/sbitmap memory ordering fixes from Omar. - divide-by-zero fix for BFQ from Paolo. - minor documentation patches from Randy. - timeout fix from Tejun. - Alpha "can't write a char atomically" fix from Mikulas. - set of NVMe fixes by way of Keith. - bsg and bsg-lib improvements from Christoph. - a few sed-opal fixes from Jonas. - cdrom check-disk-change deadlock fix from Maurizio. - various little fixes, comment fixes, etc from various folks" * tag 'for-4.17/block-20180402' of git://git.kernel.dk/linux-block: (139 commits) blk-mq: Directly schedule q->timeout_work when aborting a request blktrace: fix comment in blktrace_api.h lightnvm: remove function name in strings lightnvm: pblk: remove some unnecessary NULL checks lightnvm: pblk: don't recover unwritten lines lightnvm: pblk: implement 2.0 support lightnvm: pblk: implement get log report chunk lightnvm: pblk: rename ppaf* to addrf* lightnvm: pblk: check for supported version lightnvm: implement get log report chunk helpers lightnvm: make address conversions depend on generic device lightnvm: add support for 2.0 address format lightnvm: normalize geometry nomenclature lightnvm: complete geo structure with maxoc* lightnvm: add shorten OCSSD version in geo lightnvm: add minor version to generic geometry lightnvm: simplify geometry structure lightnvm: pblk: refactor init/exit sequences lightnvm: Avoid validation of default op value lightnvm: centralize permission check for lightnvm ioctl ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2018-04-05 14:27:02 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2018-04-05 14:27:02 -0700
commit: 3526dd0c7832f1011a0477cc6d903662bae05ea8 (patch)
tree: 22fbac64eb40a0b29bfa4c029695f39b2f591e62 /block
parent: Merge tag 'edac_for_4.17' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp (diff)
parent: blk-mq: Directly schedule q->timeout_work when aborting a request (diff)
download: wireguard-linux-3526dd0c7832f1011a0477cc6d903662bae05ea8.tar.xz
wireguard-linux-3526dd0c7832f1011a0477cc6d903662bae05ea8.zip
17 files changed, 695 insertions, 426 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index aeca22d91101..f0ecd98509d8 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -201,7 +201,20 @@ static struct kmem_cache *bfq_pool;
 /* Target observation time interval for a peak-rate update (ns) */
 #define BFQ_RATE_REF_INTERVAL	NSEC_PER_SEC
 
-/* Shift used for peak rate fixed precision calculations. */
+/*
+ * Shift used for peak-rate fixed precision calculations.
+ * With
+ * - the current shift: 16 positions
+ * - the current type used to store rate: u32
+ * - the current unit of measure for rate: [sectors/usec], or, more precisely,
+ *   [(sectors/usec) / 2^BFQ_RATE_SHIFT] to take into account the shift,
+ * the range of rates that can be stored is
+ * [1 / 2^BFQ_RATE_SHIFT, 2^(32 - BFQ_RATE_SHIFT)] sectors/usec =
+ * [1 / 2^16, 2^16] sectors/usec = [15e-6, 65536] sectors/usec =
+ * [15, 65G] sectors/sec
+ * Which, assuming a sector size of 512B, corresponds to a range of
+ * [7.5K, 33T] B/sec
+ */
 #define BFQ_RATE_SHIFT		16
 
 /*
@@ -2637,6 +2650,16 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq)
 	rate /= divisor; /* smoothing constant alpha = 1/divisor */
 
 	bfqd->peak_rate += rate;
+
+	/*
+	 * For a very slow device, bfqd->peak_rate can reach 0 (see
+	 * the minimum representable values reported in the comments
+	 * on BFQ_RATE_SHIFT). Push to 1 if this happens, to avoid
+	 * divisions by zero where bfqd->peak_rate is used as a
+	 * divisor.
+	 */
+	bfqd->peak_rate = max_t(u32, 1, bfqd->peak_rate);
+
 	update_thr_responsiveness_params(bfqd);
 
 reset_computation:
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 350c39ae2896..ae2f3dadec44 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -499,7 +499,7 @@ struct bfq_data {
 	u64 delta_from_first;
 	/*
 	 * Current estimate of the device peak rate, measured in
-	 * [BFQ_RATE_SHIFT * sectors/usec]. The left-shift by
+	 * [(sectors/usec) / 2^BFQ_RATE_SHIFT]. The left-shift by
 	 * BFQ_RATE_SHIFT is performed to increase precision in
 	 * fixed-point calculations.
 	 */
diff --git a/block/bio.c b/block/bio.c
index e1708db48258..53e0f0a1ed94 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -43,9 +43,9 @@
  * break badly! cannot be bigger than what you can fit into an
  * unsigned short
  */
-#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
+#define BV(x, n) { .nr_vecs = x, .name = "biovec-"#n }
 static struct biovec_slab bvec_slabs[BVEC_POOL_NR] __read_mostly = {
-	BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
+	BV(1, 1), BV(4, 4), BV(16, 16), BV(64, 64), BV(128, 128), BV(BIO_MAX_PAGES, max),
 };
 #undef BV
 
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index c2033a232a44..1c16694ae145 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -307,11 +307,28 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
 	}
 }
 
+static void blkg_pd_offline(struct blkcg_gq *blkg)
+{
+	int i;
+
+	lockdep_assert_held(blkg->q->queue_lock);
+	lockdep_assert_held(&blkg->blkcg->lock);
+
+	for (i = 0; i < BLKCG_MAX_POLS; i++) {
+		struct blkcg_policy *pol = blkcg_policy[i];
+
+		if (blkg->pd[i] && !blkg->pd[i]->offline &&
+		    pol->pd_offline_fn) {
+			pol->pd_offline_fn(blkg->pd[i]);
+			blkg->pd[i]->offline = true;
+		}
+	}
+}
+
 static void blkg_destroy(struct blkcg_gq *blkg)
 {
 	struct blkcg *blkcg = blkg->blkcg;
 	struct blkcg_gq *parent = blkg->parent;
-	int i;
 
 	lockdep_assert_held(blkg->q->queue_lock);
 	lockdep_assert_held(&blkcg->lock);
@@ -320,13 +337,6 @@ static void blkg_destroy(struct blkcg_gq *blkg)
 	WARN_ON_ONCE(list_empty(&blkg->q_node));
 	WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
 
-	for (i = 0; i < BLKCG_MAX_POLS; i++) {
-		struct blkcg_policy *pol = blkcg_policy[i];
-
-		if (blkg->pd[i] && pol->pd_offline_fn)
-			pol->pd_offline_fn(blkg->pd[i]);
-	}
-
 	if (parent) {
 		blkg_rwstat_add_aux(&parent->stat_bytes, &blkg->stat_bytes);
 		blkg_rwstat_add_aux(&parent->stat_ios, &blkg->stat_ios);
@@ -369,6 +379,7 @@ static void blkg_destroy_all(struct request_queue *q)
 		struct blkcg *blkcg = blkg->blkcg;
 
 		spin_lock(&blkcg->lock);
+		blkg_pd_offline(blkg);
 		blkg_destroy(blkg);
 		spin_unlock(&blkcg->lock);
 	}
@@ -995,25 +1006,25 @@ static struct cftype blkcg_legacy_files[] = {
  * @css: css of interest
  *
  * This function is called when @css is about to go away and responsible
- * for shooting down all blkgs associated with @css.  blkgs should be
- * removed while holding both q and blkcg locks.  As blkcg lock is nested
- * inside q lock, this function performs reverse double lock dancing.
+ * for offlining all blkgs pd and killing all wbs associated with @css.
+ * blkgs pd offline should be done while holding both q and blkcg locks.
+ * As blkcg lock is nested inside q lock, this function performs reverse
+ * double lock dancing.
  *
  * This is the blkcg counterpart of ioc_release_fn().
  */
 static void blkcg_css_offline(struct cgroup_subsys_state *css)
 {
 	struct blkcg *blkcg = css_to_blkcg(css);
+	struct blkcg_gq *blkg;
 
 	spin_lock_irq(&blkcg->lock);
 
-	while (!hlist_empty(&blkcg->blkg_list)) {
-		struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
-						struct blkcg_gq, blkcg_node);
+	hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
 		struct request_queue *q = blkg->q;
 
 		if (spin_trylock(q->queue_lock)) {
-			blkg_destroy(blkg);
+			blkg_pd_offline(blkg);
 			spin_unlock(q->queue_lock);
 		} else {
 			spin_unlock_irq(&blkcg->lock);
@@ -1027,11 +1038,43 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css)
 	wb_blkcg_offline(blkcg);
 }
 
+/**
+ * blkcg_destroy_all_blkgs - destroy all blkgs associated with a blkcg
+ * @blkcg: blkcg of interest
+ *
+ * This function is called when blkcg css is about to free and responsible for
+ * destroying all blkgs associated with @blkcg.
+ * blkgs should be removed while holding both q and blkcg locks. As blkcg lock
+ * is nested inside q lock, this function performs reverse double lock dancing.
+ */
+static void blkcg_destroy_all_blkgs(struct blkcg *blkcg)
+{
+	spin_lock_irq(&blkcg->lock);
+	while (!hlist_empty(&blkcg->blkg_list)) {
+		struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
+						    struct blkcg_gq,
+						    blkcg_node);
+		struct request_queue *q = blkg->q;
+
+		if (spin_trylock(q->queue_lock)) {
+			blkg_destroy(blkg);
+			spin_unlock(q->queue_lock);
+		} else {
+			spin_unlock_irq(&blkcg->lock);
+			cpu_relax();
+			spin_lock_irq(&blkcg->lock);
+		}
+	}
+	spin_unlock_irq(&blkcg->lock);
+}
+
 static void blkcg_css_free(struct cgroup_subsys_state *css)
 {
 	struct blkcg *blkcg = css_to_blkcg(css);
 	int i;
 
+	blkcg_destroy_all_blkgs(blkcg);
+
 	mutex_lock(&blkcg_pol_mutex);
 
 	list_del(&blkcg->all_blkcgs_node);
@@ -1371,8 +1414,11 @@ void blkcg_deactivate_policy(struct request_queue *q,
 		spin_lock(&blkg->blkcg->lock);
 
 		if (blkg->pd[pol->plid]) {
-			if (pol->pd_offline_fn)
+			if (!blkg->pd[pol->plid]->offline &&
+			    pol->pd_offline_fn) {
 				pol->pd_offline_fn(blkg->pd[pol->plid]);
+				blkg->pd[pol->plid]->offline = true;
+			}
 			pol->pd_free_fn(blkg->pd[pol->plid]);
 			blkg->pd[pol->plid] = NULL;
 		}
diff --git a/block/blk-core.c b/block/blk-core.c
index 6d82c4f7fadd..abcb8684ba67 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -71,6 +71,78 @@ struct kmem_cache *blk_requestq_cachep;
  */
 static struct workqueue_struct *kblockd_workqueue;
 
+/**
+ * blk_queue_flag_set - atomically set a queue flag
+ * @flag: flag to be set
+ * @q: request queue
+ */
+void blk_queue_flag_set(unsigned int flag, struct request_queue *q)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	queue_flag_set(flag, q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+EXPORT_SYMBOL(blk_queue_flag_set);
+
+/**
+ * blk_queue_flag_clear - atomically clear a queue flag
+ * @flag: flag to be cleared
+ * @q: request queue
+ */
+void blk_queue_flag_clear(unsigned int flag, struct request_queue *q)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	queue_flag_clear(flag, q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+EXPORT_SYMBOL(blk_queue_flag_clear);
+
+/**
+ * blk_queue_flag_test_and_set - atomically test and set a queue flag
+ * @flag: flag to be set
+ * @q: request queue
+ *
+ * Returns the previous value of @flag - 0 if the flag was not set and 1 if
+ * the flag was already set.
+ */
+bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q)
+{
+	unsigned long flags;
+	bool res;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	res = queue_flag_test_and_set(flag, q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+
+	return res;
+}
+EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_set);
+
+/**
+ * blk_queue_flag_test_and_clear - atomically test and clear a queue flag
+ * @flag: flag to be cleared
+ * @q: request queue
+ *
+ * Returns the previous value of @flag - 0 if the flag was not set and 1 if
+ * the flag was set.
+ */
+bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q)
+{
+	unsigned long flags;
+	bool res;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	res = queue_flag_test_and_clear(flag, q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+
+	return res;
+}
+EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_clear);
+
 static void blk_clear_congested(struct request_list *rl, int sync)
 {
 #ifdef CONFIG_CGROUP_WRITEBACK
@@ -361,25 +433,14 @@ EXPORT_SYMBOL(blk_sync_queue);
  */
 int blk_set_preempt_only(struct request_queue *q)
 {
-	unsigned long flags;
-	int res;
-
-	spin_lock_irqsave(q->queue_lock, flags);
-	res = queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q);
-	spin_unlock_irqrestore(q->queue_lock, flags);
-
-	return res;
+	return blk_queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q);
 }
 EXPORT_SYMBOL_GPL(blk_set_preempt_only);
 
 void blk_clear_preempt_only(struct request_queue *q)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(q->queue_lock, flags);
-	queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q);
+	blk_queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q);
 	wake_up_all(&q->mq_freeze_wq);
-	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL_GPL(blk_clear_preempt_only);
 
@@ -629,9 +690,7 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
 
 void blk_set_queue_dying(struct request_queue *q)
 {
-	spin_lock_irq(q->queue_lock);
-	queue_flag_set(QUEUE_FLAG_DYING, q);
-	spin_unlock_irq(q->queue_lock);
+	blk_queue_flag_set(QUEUE_FLAG_DYING, q);
 
 	/*
 	 * When queue DYING flag is set, we need to block new req
@@ -719,6 +778,37 @@ void blk_cleanup_queue(struct request_queue *q)
 	del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer);
 	blk_sync_queue(q);
 
+	/*
+	 * I/O scheduler exit is only safe after the sysfs scheduler attribute
+	 * has been removed.
+	 */
+	WARN_ON_ONCE(q->kobj.state_in_sysfs);
+
+	/*
+	 * Since the I/O scheduler exit code may access cgroup information,
+	 * perform I/O scheduler exit before disassociating from the block
+	 * cgroup controller.
+	 */
+	if (q->elevator) {
+		ioc_clear_queue(q);
+		elevator_exit(q, q->elevator);
+		q->elevator = NULL;
+	}
+
+	/*
+	 * Remove all references to @q from the block cgroup controller before
+	 * restoring @q->queue_lock to avoid that restoring this pointer causes
+	 * e.g. blkcg_print_blkgs() to crash.
+	 */
+	blkcg_exit_queue(q);
+
+	/*
+	 * Since the cgroup code may dereference the @q->backing_dev_info
+	 * pointer, only decrease its reference count after having removed the
+	 * association with the block cgroup controller.
+	 */
+	bdi_put(q->backing_dev_info);
+
 	if (q->mq_ops)
 		blk_mq_free_queue(q);
 	percpu_ref_exit(&q->q_usage_counter);
@@ -810,7 +900,7 @@ void blk_exit_rl(struct request_queue *q, struct request_list *rl)
 
 struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
 {
-	return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE);
+	return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE, NULL);
 }
 EXPORT_SYMBOL(blk_alloc_queue);
 
@@ -827,7 +917,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
 		bool success = false;
 		int ret;
 
-		rcu_read_lock_sched();
+		rcu_read_lock();
 		if (percpu_ref_tryget_live(&q->q_usage_counter)) {
 			/*
 			 * The code that sets the PREEMPT_ONLY flag is
@@ -840,7 +930,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
 				percpu_ref_put(&q->q_usage_counter);
 			}
 		}
-		rcu_read_unlock_sched();
+		rcu_read_unlock();
 
 		if (success)
 			return 0;
@@ -888,7 +978,21 @@ static void blk_rq_timed_out_timer(struct timer_list *t)
 	kblockd_schedule_work(&q->timeout_work);
 }
 
-struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
+/**
+ * blk_alloc_queue_node - allocate a request queue
+ * @gfp_mask: memory allocation flags
+ * @node_id: NUMA node to allocate memory from
+ * @lock: For legacy queues, pointer to a spinlock that will be used to e.g.
+ *        serialize calls to the legacy .request_fn() callback. Ignored for
+ *	  blk-mq request queues.
+ *
+ * Note: pass the queue lock as the third argument to this function instead of
+ * setting the queue lock pointer explicitly to avoid triggering a sporadic
+ * crash in the blkcg code. This function namely calls blkcg_init_queue() and
+ * the queue lock pointer must be set before blkcg_init_queue() is called.
+ */
+struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
+					   spinlock_t *lock)
 {
 	struct request_queue *q;
 
@@ -939,11 +1043,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	mutex_init(&q->sysfs_lock);
 	spin_lock_init(&q->__queue_lock);
 
-	/*
-	 * By default initialize queue_lock to internal lock and driver can
-	 * override it later if need be.
-	 */
-	q->queue_lock = &q->__queue_lock;
+	if (!q->mq_ops)
+		q->queue_lock = lock ? : &q->__queue_lock;
 
 	/*
 	 * A queue starts its life with bypass turned on to avoid
@@ -952,7 +1053,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	 * registered by blk_register_queue().
 	 */
 	q->bypass_depth = 1;
-	__set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
+	queue_flag_set_unlocked(QUEUE_FLAG_BYPASS, q);
 
 	init_waitqueue_head(&q->mq_freeze_wq);
 
@@ -1030,13 +1131,11 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
 {
 	struct request_queue *q;
 
-	q = blk_alloc_queue_node(GFP_KERNEL, node_id);
+	q = blk_alloc_queue_node(GFP_KERNEL, node_id, lock);
 	if (!q)
 		return NULL;
 
 	q->request_fn = rfn;
-	if (lock)
-		q->queue_lock = lock;
 	if (blk_init_allocated_queue(q) < 0) {
 		blk_cleanup_queue(q);
 		return NULL;
@@ -2023,7 +2122,7 @@ out_unlock:
 	return BLK_QC_T_NONE;
 }
 
-static void handle_bad_sector(struct bio *bio)
+static void handle_bad_sector(struct bio *bio, sector_t maxsector)
 {
 	char b[BDEVNAME_SIZE];
 
@@ -2031,7 +2130,7 @@ static void handle_bad_sector(struct bio *bio)
 	printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu\n",
 			bio_devname(bio, b), bio->bi_opf,
 			(unsigned long long)bio_end_sector(bio),
-			(long long)get_capacity(bio->bi_disk));
+			(long long)maxsector);
 }
 
 #ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -2093,67 +2192,58 @@ static noinline int should_fail_bio(struct bio *bio)
 ALLOW_ERROR_INJECTION(should_fail_bio, ERRNO);
 
 /*
+ * Check whether this bio extends beyond the end of the device or partition.
+ * This may well happen - the kernel calls bread() without checking the size of
+ * the device, e.g., when mounting a file system.
+ */
+static inline int bio_check_eod(struct bio *bio, sector_t maxsector)
+{
+	unsigned int nr_sectors = bio_sectors(bio);
+
+	if (nr_sectors && maxsector &&
+	    (nr_sectors > maxsector ||
+	     bio->bi_iter.bi_sector > maxsector - nr_sectors)) {
+		handle_bad_sector(bio, maxsector);
+		return -EIO;
+	}
+	return 0;
+}
+
+/*
  * Remap block n of partition p to block n+start(p) of the disk.
  */
 static inline int blk_partition_remap(struct bio *bio)
 {
 	struct hd_struct *p;
-	int ret = 0;
+	int ret = -EIO;
 
 	rcu_read_lock();
 	p = __disk_get_part(bio->bi_disk, bio->bi_partno);
-	if (unlikely(!p || should_fail_request(p, bio->bi_iter.bi_size) ||
-		     bio_check_ro(bio, p))) {
-		ret = -EIO;
+	if (unlikely(!p))
+		goto out;
+	if (unlikely(should_fail_request(p, bio->bi_iter.bi_size)))
+		goto out;
+	if (unlikely(bio_check_ro(bio, p)))
 		goto out;
-	}
 
 	/*
 	 * Zone reset does not include bi_size so bio_sectors() is always 0.
 	 * Include a test for the reset op code and perform the remap if needed.
 	 */
-	if (!bio_sectors(bio) && bio_op(bio) != REQ_OP_ZONE_RESET)
-		goto out;
-
-	bio->bi_iter.bi_sector += p->start_sect;
-	bio->bi_partno = 0;
-	trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p),
-			      bio->bi_iter.bi_sector - p->start_sect);
-
+	if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) {
+		if (bio_check_eod(bio, part_nr_sects_read(p)))
+			goto out;
+		bio->bi_iter.bi_sector += p->start_sect;
+		bio->bi_partno = 0;
+		trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p),
+				      bio->bi_iter.bi_sector - p->start_sect);
+	}
+	ret = 0;
 out:
 	rcu_read_unlock();
 	return ret;
 }
 
-/*
- * Check whether this bio extends beyond the end of the device.
- */
-static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
-{
-	sector_t maxsector;
-
-	if (!nr_sectors)
-		return 0;
-
-	/* Test device or partition size, when known. */
-	maxsector = get_capacity(bio->bi_disk);
-	if (maxsector) {
-		sector_t sector = bio->bi_iter.bi_sector;
-
-		if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
-			/*
-			 * This may well happen - the kernel calls bread()
-			 * without checking the size of the device, e.g., when
-			 * mounting a device.
-			 */
-			handle_bad_sector(bio);
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
 static noinline_for_stack bool
 generic_make_request_checks(struct bio *bio)
 {
@@ -2164,9 +2254,6 @@ generic_make_request_checks(struct bio *bio)
 
 	might_sleep();
 
-	if (bio_check_eod(bio, nr_sectors))
-		goto end_io;
-
 	q = bio->bi_disk->queue;
 	if (unlikely(!q)) {
 		printk(KERN_ERR
@@ -2186,17 +2273,16 @@ generic_make_request_checks(struct bio *bio)
 	if (should_fail_bio(bio))
 		goto end_io;
 
-	if (!bio->bi_partno) {
-		if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0)))
+	if (bio->bi_partno) {
+		if (unlikely(blk_partition_remap(bio)))
 			goto end_io;
 	} else {
-		if (blk_partition_remap(bio))
+		if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0)))
+			goto end_io;
+		if (unlikely(bio_check_eod(bio, get_capacity(bio->bi_disk))))
 			goto end_io;
 	}
 
-	if (bio_check_eod(bio, nr_sectors))
-		goto end_io;
-
 	/*
 	 * Filter flush bio's early so that make_request based
 	 * drivers without flush support don't have to worry
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 21cbc1f071c6..58b3b79cbe83 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -24,6 +24,64 @@
 #include "blk-mq-debugfs.h"
 #include "blk-mq-tag.h"
 
+static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
+{
+	if (stat->nr_samples) {
+		seq_printf(m, "samples=%d, mean=%lld, min=%llu, max=%llu",
+			   stat->nr_samples, stat->mean, stat->min, stat->max);
+	} else {
+		seq_puts(m, "samples=0");
+	}
+}
+
+static int queue_poll_stat_show(void *data, struct seq_file *m)
+{
+	struct request_queue *q = data;
+	int bucket;
+
+	for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS/2; bucket++) {
+		seq_printf(m, "read  (%d Bytes): ", 1 << (9+bucket));
+		print_stat(m, &q->poll_stat[2*bucket]);
+		seq_puts(m, "\n");
+
+		seq_printf(m, "write (%d Bytes): ",  1 << (9+bucket));
+		print_stat(m, &q->poll_stat[2*bucket+1]);
+		seq_puts(m, "\n");
+	}
+	return 0;
+}
+
+static void *queue_requeue_list_start(struct seq_file *m, loff_t *pos)
+	__acquires(&q->requeue_lock)
+{
+	struct request_queue *q = m->private;
+
+	spin_lock_irq(&q->requeue_lock);
+	return seq_list_start(&q->requeue_list, *pos);
+}
+
+static void *queue_requeue_list_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct request_queue *q = m->private;
+
+	return seq_list_next(v, &q->requeue_list, pos);
+}
+
+static void queue_requeue_list_stop(struct seq_file *m, void *v)
+	__releases(&q->requeue_lock)
+{
+	struct request_queue *q = m->private;
+
+	spin_unlock_irq(&q->requeue_lock);
+}
+
+static const struct seq_operations queue_requeue_list_seq_ops = {
+	.start	= queue_requeue_list_start,
+	.next	= queue_requeue_list_next,
+	.stop	= queue_requeue_list_stop,
+	.show	= blk_mq_debugfs_rq_show,
+};
+
 static int blk_flags_show(struct seq_file *m, const unsigned long flags,
 			  const char *const *flag_name, int flag_name_count)
 {
@@ -125,16 +183,6 @@ inval:
 	return count;
 }
 
-static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
-{
-	if (stat->nr_samples) {
-		seq_printf(m, "samples=%d, mean=%lld, min=%llu, max=%llu",
-			   stat->nr_samples, stat->mean, stat->min, stat->max);
-	} else {
-		seq_puts(m, "samples=0");
-	}
-}
-
 static int queue_write_hint_show(void *data, struct seq_file *m)
 {
 	struct request_queue *q = data;
@@ -158,23 +206,30 @@ static ssize_t queue_write_hint_store(void *data, const char __user *buf,
 	return count;
 }
 
-static int queue_poll_stat_show(void *data, struct seq_file *m)
+static int queue_zone_wlock_show(void *data, struct seq_file *m)
 {
 	struct request_queue *q = data;
-	int bucket;
+	unsigned int i;
 
-	for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS/2; bucket++) {
-		seq_printf(m, "read  (%d Bytes): ", 1 << (9+bucket));
-		print_stat(m, &q->poll_stat[2*bucket]);
-		seq_puts(m, "\n");
+	if (!q->seq_zones_wlock)
+		return 0;
+
+	for (i = 0; i < blk_queue_nr_zones(q); i++)
+		if (test_bit(i, q->seq_zones_wlock))
+			seq_printf(m, "%u\n", i);
 
-		seq_printf(m, "write (%d Bytes): ",  1 << (9+bucket));
-		print_stat(m, &q->poll_stat[2*bucket+1]);
-		seq_puts(m, "\n");
-	}
 	return 0;
 }
 
+static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
+	{ "poll_stat", 0400, queue_poll_stat_show },
+	{ "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops },
+	{ "state", 0600, queue_state_show, queue_state_write },
+	{ "write_hints", 0600, queue_write_hint_show, queue_write_hint_store },
+	{ "zone_wlock", 0400, queue_zone_wlock_show, NULL },
+	{ },
+};
+
 #define HCTX_STATE_NAME(name) [BLK_MQ_S_##name] = #name
 static const char *const hctx_state_name[] = {
 	HCTX_STATE_NAME(STOPPED),
@@ -295,6 +350,20 @@ static const char *const rqf_name[] = {
 };
 #undef RQF_NAME
 
+static const char *const blk_mq_rq_state_name_array[] = {
+	[MQ_RQ_IDLE]		= "idle",
+	[MQ_RQ_IN_FLIGHT]	= "in_flight",
+	[MQ_RQ_COMPLETE]	= "complete",
+};
+
+static const char *blk_mq_rq_state_name(enum mq_rq_state rq_state)
+{
+	if (WARN_ON_ONCE((unsigned int)rq_state >
+			 ARRAY_SIZE(blk_mq_rq_state_name_array)))
+		return "(?)";
+	return blk_mq_rq_state_name_array[rq_state];
+}
+
 int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
 {
 	const struct blk_mq_ops *const mq_ops = rq->q->mq_ops;
@@ -311,7 +380,7 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
 	seq_puts(m, ", .rq_flags=");
 	blk_flags_show(m, (__force unsigned int)rq->rq_flags, rqf_name,
 		       ARRAY_SIZE(rqf_name));
-	seq_printf(m, ", complete=%d", blk_rq_is_complete(rq));
+	seq_printf(m, ", .state=%s", blk_mq_rq_state_name(blk_mq_rq_state(rq)));
 	seq_printf(m, ", .tag=%d, .internal_tag=%d", rq->tag,
 		   rq->internal_tag);
 	if (mq_ops->show_rq)
@@ -327,37 +396,6 @@ int blk_mq_debugfs_rq_show(struct seq_file *m, void *v)
 }
 EXPORT_SYMBOL_GPL(blk_mq_debugfs_rq_show);
 
-static void *queue_requeue_list_start(struct seq_file *m, loff_t *pos)
-	__acquires(&q->requeue_lock)
-{
-	struct request_queue *q = m->private;
-
-	spin_lock_irq(&q->requeue_lock);
-	return seq_list_start(&q->requeue_list, *pos);
-}
-
-static void *queue_requeue_list_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	struct request_queue *q = m->private;
-
-	return seq_list_next(v, &q->requeue_list, pos);
-}
-
-static void queue_requeue_list_stop(struct seq_file *m, void *v)
-	__releases(&q->requeue_lock)
-{
-	struct request_queue *q = m->private;
-
-	spin_unlock_irq(&q->requeue_lock);
-}
-
-static const struct seq_operations queue_requeue_list_seq_ops = {
-	.start	= queue_requeue_list_start,
-	.next	= queue_requeue_list_next,
-	.stop	= queue_requeue_list_stop,
-	.show	= blk_mq_debugfs_rq_show,
-};
-
 static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos)
 	__acquires(&hctx->lock)
 {
@@ -747,14 +785,6 @@ static const struct file_operations blk_mq_debugfs_fops = {
 	.release	= blk_mq_debugfs_release,
 };
 
-static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
-	{"poll_stat", 0400, queue_poll_stat_show},
-	{"requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops},
-	{"state", 0600, queue_state_show, queue_state_write},
-	{"write_hints", 0600, queue_write_hint_show, queue_write_hint_store},
-	{},
-};
-
 static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
 	{"state", 0400, hctx_state_show},
 	{"flags", 0400, hctx_flags_show},
diff --git a/block/blk-mq-pci.c b/block/blk-mq-pci.c
index 76944e3271bf..e233996bb76f 100644
--- a/block/blk-mq-pci.c
+++ b/block/blk-mq-pci.c
@@ -21,6 +21,7 @@
  * blk_mq_pci_map_queues - provide a default queue mapping for PCI device
  * @set:	tagset to provide the mapping for
  * @pdev:	PCI device associated with @set.
+ * @offset:	Offset to use for the pci irq vector
  *
  * This function assumes the PCI device @pdev has at least as many available
  * interrupt vectors as @set has queues.  It will then query the vector
@@ -28,13 +29,14 @@
  * that maps a queue to the CPUs that have irq affinity for the corresponding
  * vector.
  */
-int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev)
+int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev,
+			    int offset)
 {
 	const struct cpumask *mask;
 	unsigned int queue, cpu;
 
 	for (queue = 0; queue < set->nr_hw_queues; queue++) {
-		mask = pci_irq_get_affinity(pdev, queue);
+		mask = pci_irq_get_affinity(pdev, queue + offset);
 		if (!mask)
 			goto fallback;
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 16e83e6df404..f5c7dbcb954f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -194,11 +194,7 @@ EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
  */
 void blk_mq_quiesce_queue_nowait(struct request_queue *q)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(q->queue_lock, flags);
-	queue_flag_set(QUEUE_FLAG_QUIESCED, q);
-	spin_unlock_irqrestore(q->queue_lock, flags);
+	blk_queue_flag_set(QUEUE_FLAG_QUIESCED, q);
 }
 EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
 
@@ -239,11 +235,7 @@ EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
  */
 void blk_mq_unquiesce_queue(struct request_queue *q)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(q->queue_lock, flags);
-	queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
-	spin_unlock_irqrestore(q->queue_lock, flags);
+	blk_queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
 
 	/* dispatch requests which are inserted during quiescing */
 	blk_mq_run_hw_queues(q, true);
@@ -986,9 +978,9 @@ static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
 	struct blk_mq_hw_ctx *hctx = flush_data->hctx;
 	struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
 
-	sbitmap_clear_bit(sb, bitnr);
 	spin_lock(&ctx->lock);
 	list_splice_tail_init(&ctx->rq_list, flush_data->list);
+	sbitmap_clear_bit(sb, bitnr);
 	spin_unlock(&ctx->lock);
 	return true;
 }
@@ -2556,7 +2548,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 {
 	struct request_queue *uninit_q, *q;
 
-	uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node);
+	uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node, NULL);
 	if (!uninit_q)
 		return ERR_PTR(-ENOMEM);
 
@@ -2678,7 +2670,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
 
 	if (!(set->flags & BLK_MQ_F_SG_MERGE))
-		q->queue_flags |= 1 << QUEUE_FLAG_NO_SG_MERGE;
+		queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
 
 	q->sg_reserved_size = INT_MAX;
 
@@ -3005,7 +2997,7 @@ EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
 static bool blk_poll_stats_enable(struct request_queue *q)
 {
 	if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags) ||
-	    test_and_set_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags))
+	    blk_queue_flag_test_and_set(QUEUE_FLAG_POLL_STATS, q))
 		return true;
 	blk_stat_add_callback(q, q->poll_cb);
 	return false;
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 48ebe6be07b7..d1de71124656 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -859,12 +859,10 @@ EXPORT_SYMBOL(blk_queue_update_dma_alignment);
 
 void blk_queue_flush_queueable(struct request_queue *q, bool queueable)
 {
-	spin_lock_irq(q->queue_lock);
 	if (queueable)
-		clear_bit(QUEUE_FLAG_FLUSH_NQ, &q->queue_flags);
+		blk_queue_flag_clear(QUEUE_FLAG_FLUSH_NQ, q);
 	else
-		set_bit(QUEUE_FLAG_FLUSH_NQ, &q->queue_flags);
-	spin_unlock_irq(q->queue_lock);
+		blk_queue_flag_set(QUEUE_FLAG_FLUSH_NQ, q);
 }
 EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
 
diff --git a/block/blk-stat.c b/block/blk-stat.c
index 28003bf9941c..bd365a95fcf8 100644
--- a/block/blk-stat.c
+++ b/block/blk-stat.c
@@ -152,7 +152,7 @@ void blk_stat_add_callback(struct request_queue *q,
 
 	spin_lock(&q->stats->lock);
 	list_add_tail_rcu(&cb->list, &q->stats->callbacks);
-	set_bit(QUEUE_FLAG_STATS, &q->queue_flags);
+	blk_queue_flag_set(QUEUE_FLAG_STATS, q);
 	spin_unlock(&q->stats->lock);
 }
 EXPORT_SYMBOL_GPL(blk_stat_add_callback);
@@ -163,7 +163,7 @@ void blk_stat_remove_callback(struct request_queue *q,
 	spin_lock(&q->stats->lock);
 	list_del_rcu(&cb->list);
 	if (list_empty(&q->stats->callbacks) && !q->stats->enable_accounting)
-		clear_bit(QUEUE_FLAG_STATS, &q->queue_flags);
+		blk_queue_flag_clear(QUEUE_FLAG_STATS, q);
 	spin_unlock(&q->stats->lock);
 
 	del_timer_sync(&cb->timer);
@@ -191,7 +191,7 @@ void blk_stat_enable_accounting(struct request_queue *q)
 {
 	spin_lock(&q->stats->lock);
 	q->stats->enable_accounting = true;
-	set_bit(QUEUE_FLAG_STATS, &q->queue_flags);
+	blk_queue_flag_set(QUEUE_FLAG_STATS, q);
 	spin_unlock(&q->stats->lock);
 }
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index cbea895a5547..d00d1b0ec109 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -276,12 +276,10 @@ queue_store_##name(struct request_queue *q, const char *page, size_t count) \
 	if (neg)							\
 		val = !val;						\
 									\
-	spin_lock_irq(q->queue_lock);					\
 	if (val)							\
-		queue_flag_set(QUEUE_FLAG_##flag, q);			\
+		blk_queue_flag_set(QUEUE_FLAG_##flag, q);		\
 	else								\
-		queue_flag_clear(QUEUE_FLAG_##flag, q);			\
-	spin_unlock_irq(q->queue_lock);					\
+		blk_queue_flag_clear(QUEUE_FLAG_##flag, q);		\
 	return ret;							\
 }
 
@@ -414,12 +412,10 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page,
 	if (ret < 0)
 		return ret;
 
-	spin_lock_irq(q->queue_lock);
 	if (poll_on)
-		queue_flag_set(QUEUE_FLAG_POLL, q);
+		blk_queue_flag_set(QUEUE_FLAG_POLL, q);
 	else
-		queue_flag_clear(QUEUE_FLAG_POLL, q);
-	spin_unlock_irq(q->queue_lock);
+		blk_queue_flag_clear(QUEUE_FLAG_POLL, q);
 
 	return ret;
 }
@@ -487,12 +483,10 @@ static ssize_t queue_wc_store(struct request_queue *q, const char *page,
 	if (set == -1)
 		return -EINVAL;
 
-	spin_lock_irq(q->queue_lock);
 	if (set)
-		queue_flag_set(QUEUE_FLAG_WC, q);
+		blk_queue_flag_set(QUEUE_FLAG_WC, q);
 	else
-		queue_flag_clear(QUEUE_FLAG_WC, q);
-	spin_unlock_irq(q->queue_lock);
+		blk_queue_flag_clear(QUEUE_FLAG_WC, q);
 
 	return count;
 }
@@ -798,13 +792,6 @@ static void __blk_release_queue(struct work_struct *work)
 	if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags))
 		blk_stat_remove_callback(q, q->poll_cb);
 	blk_stat_free_callback(q->poll_cb);
-	bdi_put(q->backing_dev_info);
-	blkcg_exit_queue(q);
-
-	if (q->elevator) {
-		ioc_clear_queue(q);
-		elevator_exit(q, q->elevator);
-	}
 
 	blk_free_queue_stats(q->stats);
 
@@ -953,9 +940,7 @@ void blk_unregister_queue(struct gendisk *disk)
 	 */
 	mutex_lock(&q->sysfs_lock);
 
-	spin_lock_irq(q->queue_lock);
-	queue_flag_clear(QUEUE_FLAG_REGISTERED, q);
-	spin_unlock_irq(q->queue_lock);
+	blk_queue_flag_clear(QUEUE_FLAG_REGISTERED, q);
 
 	/*
 	 * Remove the sysfs attributes before unregistering the queue data
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index a05e3676d24a..652d4d4d3e97 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -57,12 +57,10 @@ ssize_t part_timeout_store(struct device *dev, struct device_attribute *attr,
 		char *p = (char *) buf;
 
 		val = simple_strtoul(p, &p, 10);
-		spin_lock_irq(q->queue_lock);
 		if (val)
-			queue_flag_set(QUEUE_FLAG_FAIL_IO, q);
+			blk_queue_flag_set(QUEUE_FLAG_FAIL_IO, q);
 		else
-			queue_flag_clear(QUEUE_FLAG_FAIL_IO, q);
-		spin_unlock_irq(q->queue_lock);
+			blk_queue_flag_clear(QUEUE_FLAG_FAIL_IO, q);
 	}
 
 	return count;
@@ -165,7 +163,7 @@ void blk_abort_request(struct request *req)
 		 * No need for fancy synchronizations.
 		 */
 		blk_rq_set_deadline(req, jiffies);
-		mod_timer(&req->q->timeout, 0);
+		kblockd_schedule_work(&req->q->timeout_work);
 	} else {
 		if (blk_mark_rq_complete(req))
 			return;
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index acb7252c7e81..08e84ef2bc05 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -296,7 +296,7 @@ int blkdev_reset_zones(struct block_device *bdev,
 }
 EXPORT_SYMBOL_GPL(blkdev_reset_zones);
 
-/**
+/*
  * BLKREPORTZONE ioctl processing.
  * Called from blkdev_ioctl.
  */
@@ -355,7 +355,7 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
 	return ret;
 }
 
-/**
+/*
  * BLKRESETZONE ioctl processing.
  * Called from blkdev_ioctl.
  */
diff --git a/block/blk.h b/block/blk.h
index 46db5dc83dcb..b034fd2460c4 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -41,6 +41,75 @@ extern struct kmem_cache *request_cachep;
 extern struct kobj_type blk_queue_ktype;
 extern struct ida blk_queue_ida;
 
+/*
+ * @q->queue_lock is set while a queue is being initialized. Since we know
+ * that no other threads access the queue object before @q->queue_lock has
+ * been set, it is safe to manipulate queue flags without holding the
+ * queue_lock if @q->queue_lock == NULL. See also blk_alloc_queue_node() and
+ * blk_init_allocated_queue().
+ */
+static inline void queue_lockdep_assert_held(struct request_queue *q)
+{
+	if (q->queue_lock)
+		lockdep_assert_held(q->queue_lock);
+}
+
+static inline void queue_flag_set_unlocked(unsigned int flag,
+					   struct request_queue *q)
+{
+	if (test_bit(QUEUE_FLAG_INIT_DONE, &q->queue_flags) &&
+	    kref_read(&q->kobj.kref))
+		lockdep_assert_held(q->queue_lock);
+	__set_bit(flag, &q->queue_flags);
+}
+
+static inline void queue_flag_clear_unlocked(unsigned int flag,
+					     struct request_queue *q)
+{
+	if (test_bit(QUEUE_FLAG_INIT_DONE, &q->queue_flags) &&
+	    kref_read(&q->kobj.kref))
+		lockdep_assert_held(q->queue_lock);
+	__clear_bit(flag, &q->queue_flags);
+}
+
+static inline int queue_flag_test_and_clear(unsigned int flag,
+					    struct request_queue *q)
+{
+	queue_lockdep_assert_held(q);
+
+	if (test_bit(flag, &q->queue_flags)) {
+		__clear_bit(flag, &q->queue_flags);
+		return 1;
+	}
+
+	return 0;
+}
+
+static inline int queue_flag_test_and_set(unsigned int flag,
+					  struct request_queue *q)
+{
+	queue_lockdep_assert_held(q);
+
+	if (!test_bit(flag, &q->queue_flags)) {
+		__set_bit(flag, &q->queue_flags);
+		return 0;
+	}
+
+	return 1;
+}
+
+static inline void queue_flag_set(unsigned int flag, struct request_queue *q)
+{
+	queue_lockdep_assert_held(q);
+	__set_bit(flag, &q->queue_flags);
+}
+
+static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
+{
+	queue_lockdep_assert_held(q);
+	__clear_bit(flag, &q->queue_flags);
+}
+
 static inline struct blk_flush_queue *blk_get_flush_queue(
 		struct request_queue *q, struct blk_mq_ctx *ctx)
 {
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 1474153f73e3..fc2e5ff2c4b9 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -27,6 +27,94 @@
 #include <linux/bsg-lib.h>
 #include <linux/export.h>
 #include <scsi/scsi_cmnd.h>
+#include <scsi/sg.h>
+
+#define uptr64(val) ((void __user *)(uintptr_t)(val))
+
+static int bsg_transport_check_proto(struct sg_io_v4 *hdr)
+{
+	if (hdr->protocol != BSG_PROTOCOL_SCSI  ||
+	    hdr->subprotocol != BSG_SUB_PROTOCOL_SCSI_TRANSPORT)
+		return -EINVAL;
+	if (!capable(CAP_SYS_RAWIO))
+		return -EPERM;
+	return 0;
+}
+
+static int bsg_transport_fill_hdr(struct request *rq, struct sg_io_v4 *hdr,
+		fmode_t mode)
+{
+	struct bsg_job *job = blk_mq_rq_to_pdu(rq);
+
+	job->request_len = hdr->request_len;
+	job->request = memdup_user(uptr64(hdr->request), hdr->request_len);
+	if (IS_ERR(job->request))
+		return PTR_ERR(job->request);
+	return 0;
+}
+
+static int bsg_transport_complete_rq(struct request *rq, struct sg_io_v4 *hdr)
+{
+	struct bsg_job *job = blk_mq_rq_to_pdu(rq);
+	int ret = 0;
+
+	/*
+	 * The assignments below don't make much sense, but are kept for
+	 * bug by bug backwards compatibility:
+	 */
+	hdr->device_status = job->result & 0xff;
+	hdr->transport_status = host_byte(job->result);
+	hdr->driver_status = driver_byte(job->result);
+	hdr->info = 0;
+	if (hdr->device_status || hdr->transport_status || hdr->driver_status)
+		hdr->info |= SG_INFO_CHECK;
+	hdr->response_len = 0;
+
+	if (job->result < 0) {
+		/* we're only returning the result field in the reply */
+		job->reply_len = sizeof(u32);
+		ret = job->result;
+	}
+
+	if (job->reply_len && hdr->response) {
+		int len = min(hdr->max_response_len, job->reply_len);
+
+		if (copy_to_user(uptr64(hdr->response), job->reply, len))
+			ret = -EFAULT;
+		else
+			hdr->response_len = len;
+	}
+
+	/* we assume all request payload was transferred, residual == 0 */
+	hdr->dout_resid = 0;
+
+	if (rq->next_rq) {
+		unsigned int rsp_len = job->reply_payload.payload_len;
+
+		if (WARN_ON(job->reply_payload_rcv_len > rsp_len))
+			hdr->din_resid = 0;
+		else
+			hdr->din_resid = rsp_len - job->reply_payload_rcv_len;
+	} else {
+		hdr->din_resid = 0;
+	}
+
+	return ret;
+}
+
+static void bsg_transport_free_rq(struct request *rq)
+{
+	struct bsg_job *job = blk_mq_rq_to_pdu(rq);
+
+	kfree(job->request);
+}
+
+static const struct bsg_ops bsg_transport_ops = {
+	.check_proto		= bsg_transport_check_proto,
+	.fill_hdr		= bsg_transport_fill_hdr,
+	.complete_rq		= bsg_transport_complete_rq,
+	.free_rq		= bsg_transport_free_rq,
+};
 
 /**
  * bsg_teardown_job - routine to teardown a bsg job
@@ -35,7 +123,7 @@
 static void bsg_teardown_job(struct kref *kref)
 {
 	struct bsg_job *job = container_of(kref, struct bsg_job, kref);
-	struct request *rq = job->req;
+	struct request *rq = blk_mq_rq_from_pdu(job);
 
 	put_device(job->dev);	/* release reference for the request */
 
@@ -68,28 +156,9 @@ EXPORT_SYMBOL_GPL(bsg_job_get);
 void bsg_job_done(struct bsg_job *job, int result,
 		  unsigned int reply_payload_rcv_len)
 {
-	struct request *req = job->req;
-	struct request *rsp = req->next_rq;
-	struct scsi_request *rq = scsi_req(req);
-	int err;
-
-	err = scsi_req(job->req)->result = result;
-	if (err < 0)
-		/* we're only returning the result field in the reply */
-		rq->sense_len = sizeof(u32);
-	else
-		rq->sense_len = job->reply_len;
-	/* we assume all request payload was transferred, residual == 0 */
-	rq->resid_len = 0;
-
-	if (rsp) {
-		WARN_ON(reply_payload_rcv_len > scsi_req(rsp)->resid_len);
-
-		/* set reply (bidi) residual */
-		scsi_req(rsp)->resid_len -=
-			min(reply_payload_rcv_len, scsi_req(rsp)->resid_len);
-	}
-	blk_complete_request(req);
+	job->result = result;
+	job->reply_payload_rcv_len = reply_payload_rcv_len;
+	blk_complete_request(blk_mq_rq_from_pdu(job));
 }
 EXPORT_SYMBOL_GPL(bsg_job_done);
 
@@ -114,7 +183,6 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
 	if (!buf->sg_list)
 		return -ENOMEM;
 	sg_init_table(buf->sg_list, req->nr_phys_segments);
-	scsi_req(req)->resid_len = blk_rq_bytes(req);
 	buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list);
 	buf->payload_len = blk_rq_bytes(req);
 	return 0;
@@ -125,15 +193,13 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
  * @dev: device that is being sent the bsg request
  * @req: BSG request that needs a job structure
  */
-static int bsg_prepare_job(struct device *dev, struct request *req)
+static bool bsg_prepare_job(struct device *dev, struct request *req)
 {
 	struct request *rsp = req->next_rq;
-	struct scsi_request *rq = scsi_req(req);
 	struct bsg_job *job = blk_mq_rq_to_pdu(req);
 	int ret;
 
-	job->request = rq->cmd;
-	job->request_len = rq->cmd_len;
+	job->timeout = req->timeout;
 
 	if (req->bio) {
 		ret = bsg_map_buffer(&job->request_payload, req);
@@ -149,12 +215,13 @@ static int bsg_prepare_job(struct device *dev, struct request *req)
 	/* take a reference for the request */
 	get_device(job->dev);
 	kref_init(&job->kref);
-	return 0;
+	return true;
 
 failjob_rls_rqst_payload:
 	kfree(job->request_payload.sg_list);
 failjob_rls_job:
-	return -ENOMEM;
+	job->result = -ENOMEM;
+	return false;
 }
 
 /**
@@ -183,9 +250,7 @@ static void bsg_request_fn(struct request_queue *q)
 			break;
 		spin_unlock_irq(q->queue_lock);
 
-		ret = bsg_prepare_job(dev, req);
-		if (ret) {
-			scsi_req(req)->result = ret;
+		if (!bsg_prepare_job(dev, req)) {
 			blk_end_request_all(req, BLK_STS_OK);
 			spin_lock_irq(q->queue_lock);
 			continue;
@@ -202,47 +267,34 @@ static void bsg_request_fn(struct request_queue *q)
 	spin_lock_irq(q->queue_lock);
 }
 
+/* called right after the request is allocated for the request_queue */
 static int bsg_init_rq(struct request_queue *q, struct request *req, gfp_t gfp)
 {
 	struct bsg_job *job = blk_mq_rq_to_pdu(req);
-	struct scsi_request *sreq = &job->sreq;
-
-	/* called right after the request is allocated for the request_queue */
 
-	sreq->sense = kzalloc(SCSI_SENSE_BUFFERSIZE, gfp);
-	if (!sreq->sense)
+	job->reply = kzalloc(SCSI_SENSE_BUFFERSIZE, gfp);
+	if (!job->reply)
 		return -ENOMEM;
-
 	return 0;
 }
 
+/* called right before the request is given to the request_queue user */
 static void bsg_initialize_rq(struct request *req)
 {
 	struct bsg_job *job = blk_mq_rq_to_pdu(req);
-	struct scsi_request *sreq = &job->sreq;
-	void *sense = sreq->sense;
-
-	/* called right before the request is given to the request_queue user */
+	void *reply = job->reply;
 
 	memset(job, 0, sizeof(*job));
-
-	scsi_req_init(sreq);
-
-	sreq->sense = sense;
-	sreq->sense_len = SCSI_SENSE_BUFFERSIZE;
-
-	job->req = req;
-	job->reply = sense;
-	job->reply_len = sreq->sense_len;
+	job->reply = reply;
+	job->reply_len = SCSI_SENSE_BUFFERSIZE;
 	job->dd_data = job + 1;
 }
 
 static void bsg_exit_rq(struct request_queue *q, struct request *req)
 {
 	struct bsg_job *job = blk_mq_rq_to_pdu(req);
-	struct scsi_request *sreq = &job->sreq;
 
-	kfree(sreq->sense);
+	kfree(job->reply);
 }
 
 /**
@@ -275,12 +327,11 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
 
 	q->queuedata = dev;
 	q->bsg_job_fn = job_fn;
-	queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
-	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
+	blk_queue_flag_set(QUEUE_FLAG_BIDI, q);
 	blk_queue_softirq_done(q, bsg_softirq_done);
 	blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
 
-	ret = bsg_register_queue(q, dev, name, release);
+	ret = bsg_register_queue(q, dev, name, &bsg_transport_ops, release);
 	if (ret) {
 		printk(KERN_ERR "%s: bsg interface failed to "
 		       "initialize - register queue\n", dev->kobj.name);
diff --git a/block/bsg.c b/block/bsg.c
index 06dc96e1f670..defa06c11858 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -130,114 +130,120 @@ static inline struct hlist_head *bsg_dev_idx_hash(int index)
 	return &bsg_device_list[index & (BSG_LIST_ARRAY_SIZE - 1)];
 }
 
-static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
-				struct sg_io_v4 *hdr, struct bsg_device *bd,
-				fmode_t mode)
+#define uptr64(val) ((void __user *)(uintptr_t)(val))
+
+static int bsg_scsi_check_proto(struct sg_io_v4 *hdr)
+{
+	if (hdr->protocol != BSG_PROTOCOL_SCSI  ||
+	    hdr->subprotocol != BSG_SUB_PROTOCOL_SCSI_CMD)
+		return -EINVAL;
+	return 0;
+}
+
+static int bsg_scsi_fill_hdr(struct request *rq, struct sg_io_v4 *hdr,
+		fmode_t mode)
 {
-	struct scsi_request *req = scsi_req(rq);
+	struct scsi_request *sreq = scsi_req(rq);
 
-	if (hdr->request_len > BLK_MAX_CDB) {
-		req->cmd = kzalloc(hdr->request_len, GFP_KERNEL);
-		if (!req->cmd)
+	sreq->cmd_len = hdr->request_len;
+	if (sreq->cmd_len > BLK_MAX_CDB) {
+		sreq->cmd = kzalloc(sreq->cmd_len, GFP_KERNEL);
+		if (!sreq->cmd)
 			return -ENOMEM;
 	}
 
-	if (copy_from_user(req->cmd, (void __user *)(unsigned long)hdr->request,
-			   hdr->request_len))
+	if (copy_from_user(sreq->cmd, uptr64(hdr->request), sreq->cmd_len))
 		return -EFAULT;
-
-	if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) {
-		if (blk_verify_command(req->cmd, mode))
-			return -EPERM;
-	} else if (!capable(CAP_SYS_RAWIO))
+	if (blk_verify_command(sreq->cmd, mode))
 		return -EPERM;
-
-	/*
-	 * fill in request structure
-	 */
-	req->cmd_len = hdr->request_len;
-
-	rq->timeout = msecs_to_jiffies(hdr->timeout);
-	if (!rq->timeout)
-		rq->timeout = q->sg_timeout;
-	if (!rq->timeout)
-		rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
-	if (rq->timeout < BLK_MIN_SG_TIMEOUT)
-		rq->timeout = BLK_MIN_SG_TIMEOUT;
-
 	return 0;
 }
 
-/*
- * Check if sg_io_v4 from user is allowed and valid
- */
-static int
-bsg_validate_sgv4_hdr(struct sg_io_v4 *hdr, int *op)
+static int bsg_scsi_complete_rq(struct request *rq, struct sg_io_v4 *hdr)
 {
+	struct scsi_request *sreq = scsi_req(rq);
 	int ret = 0;
 
-	if (hdr->guard != 'Q')
-		return -EINVAL;
+	/*
+	 * fill in all the output members
+	 */
+	hdr->device_status = sreq->result & 0xff;
+	hdr->transport_status = host_byte(sreq->result);
+	hdr->driver_status = driver_byte(sreq->result);
+	hdr->info = 0;
+	if (hdr->device_status || hdr->transport_status || hdr->driver_status)
+		hdr->info |= SG_INFO_CHECK;
+	hdr->response_len = 0;
 
-	switch (hdr->protocol) {
-	case BSG_PROTOCOL_SCSI:
-		switch (hdr->subprotocol) {
-		case BSG_SUB_PROTOCOL_SCSI_CMD:
-		case BSG_SUB_PROTOCOL_SCSI_TRANSPORT:
-			break;
-		default:
-			ret = -EINVAL;
-		}
-		break;
-	default:
-		ret = -EINVAL;
+	if (sreq->sense_len && hdr->response) {
+		int len = min_t(unsigned int, hdr->max_response_len,
+					sreq->sense_len);
+
+		if (copy_to_user(uptr64(hdr->response), sreq->sense, len))
+			ret = -EFAULT;
+		else
+			hdr->response_len = len;
+	}
+
+	if (rq->next_rq) {
+		hdr->dout_resid = sreq->resid_len;
+		hdr->din_resid = scsi_req(rq->next_rq)->resid_len;
+	} else if (rq_data_dir(rq) == READ) {
+		hdr->din_resid = sreq->resid_len;
+	} else {
+		hdr->dout_resid = sreq->resid_len;
 	}
 
-	*op = hdr->dout_xfer_len ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN;
 	return ret;
 }
 
-/*
- * map sg_io_v4 to a request.
- */
+static void bsg_scsi_free_rq(struct request *rq)
+{
+	scsi_req_free_cmd(scsi_req(rq));
+}
+
+static const struct bsg_ops bsg_scsi_ops = {
+	.check_proto		= bsg_scsi_check_proto,
+	.fill_hdr		= bsg_scsi_fill_hdr,
+	.complete_rq		= bsg_scsi_complete_rq,
+	.free_rq		= bsg_scsi_free_rq,
+};
+
 static struct request *
-bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t mode)
+bsg_map_hdr(struct request_queue *q, struct sg_io_v4 *hdr, fmode_t mode)
 {
-	struct request_queue *q = bd->queue;
 	struct request *rq, *next_rq = NULL;
 	int ret;
-	unsigned int op, dxfer_len;
-	void __user *dxferp = NULL;
-	struct bsg_class_device *bcd = &q->bsg_dev;
 
-	/* if the LLD has been removed then the bsg_unregister_queue will
-	 * eventually be called and the class_dev was freed, so we can no
-	 * longer use this request_queue. Return no such address.
-	 */
-	if (!bcd->class_dev)
+	if (!q->bsg_dev.class_dev)
 		return ERR_PTR(-ENXIO);
 
-	bsg_dbg(bd, "map hdr %llx/%u %llx/%u\n",
-		(unsigned long long) hdr->dout_xferp,
-		hdr->dout_xfer_len, (unsigned long long) hdr->din_xferp,
-		hdr->din_xfer_len);
+	if (hdr->guard != 'Q')
+		return ERR_PTR(-EINVAL);
 
-	ret = bsg_validate_sgv4_hdr(hdr, &op);
+	ret = q->bsg_dev.ops->check_proto(hdr);
 	if (ret)
 		return ERR_PTR(ret);
 
-	/*
-	 * map scatter-gather elements separately and string them to request
-	 */
-	rq = blk_get_request(q, op, GFP_KERNEL);
+	rq = blk_get_request(q, hdr->dout_xfer_len ?
+			REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
+			GFP_KERNEL);
 	if (IS_ERR(rq))
 		return rq;
 
-	ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, mode);
+	ret = q->bsg_dev.ops->fill_hdr(rq, hdr, mode);
 	if (ret)
 		goto out;
 
-	if (op == REQ_OP_SCSI_OUT && hdr->din_xfer_len) {
+	rq->timeout = msecs_to_jiffies(hdr->timeout);
+	if (!rq->timeout)
+		rq->timeout = q->sg_timeout;
+	if (!rq->timeout)
+		rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
+	if (rq->timeout < BLK_MIN_SG_TIMEOUT)
+		rq->timeout = BLK_MIN_SG_TIMEOUT;
+
+	if (hdr->dout_xfer_len && hdr->din_xfer_len) {
 		if (!test_bit(QUEUE_FLAG_BIDI, &q->queue_flags)) {
 			ret = -EOPNOTSUPP;
 			goto out;
@@ -246,42 +252,39 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t mode)
 		next_rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL);
 		if (IS_ERR(next_rq)) {
 			ret = PTR_ERR(next_rq);
-			next_rq = NULL;
 			goto out;
 		}
-		rq->next_rq = next_rq;
 
-		dxferp = (void __user *)(unsigned long)hdr->din_xferp;
-		ret =  blk_rq_map_user(q, next_rq, NULL, dxferp,
+		rq->next_rq = next_rq;
+		ret = blk_rq_map_user(q, next_rq, NULL, uptr64(hdr->din_xferp),
 				       hdr->din_xfer_len, GFP_KERNEL);
 		if (ret)
-			goto out;
+			goto out_free_nextrq;
 	}
 
 	if (hdr->dout_xfer_len) {
-		dxfer_len = hdr->dout_xfer_len;
-		dxferp = (void __user *)(unsigned long)hdr->dout_xferp;
+		ret = blk_rq_map_user(q, rq, NULL, uptr64(hdr->dout_xferp),
+				hdr->dout_xfer_len, GFP_KERNEL);
 	} else if (hdr->din_xfer_len) {
-		dxfer_len = hdr->din_xfer_len;
-		dxferp = (void __user *)(unsigned long)hdr->din_xferp;
-	} else
-		dxfer_len = 0;
-
-	if (dxfer_len) {
-		ret = blk_rq_map_user(q, rq, NULL, dxferp, dxfer_len,
-				      GFP_KERNEL);
-		if (ret)
-			goto out;
+		ret = blk_rq_map_user(q, rq, NULL, uptr64(hdr->din_xferp),
+				hdr->din_xfer_len, GFP_KERNEL);
+	} else {
+		ret = blk_rq_map_user(q, rq, NULL, NULL, 0, GFP_KERNEL);
 	}
 
+	if (ret)
+		goto out_unmap_nextrq;
 	return rq;
+
+out_unmap_nextrq:
+	if (rq->next_rq)
+		blk_rq_unmap_user(rq->next_rq->bio);
+out_free_nextrq:
+	if (rq->next_rq)
+		blk_put_request(rq->next_rq);
 out:
-	scsi_req_free_cmd(scsi_req(rq));
+	q->bsg_dev.ops->free_rq(rq);
 	blk_put_request(rq);
-	if (next_rq) {
-		blk_rq_unmap_user(next_rq->bio);
-		blk_put_request(next_rq);
-	}
 	return ERR_PTR(ret);
 }
 
@@ -383,56 +386,18 @@ static struct bsg_command *bsg_get_done_cmd(struct bsg_device *bd)
 static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
 				    struct bio *bio, struct bio *bidi_bio)
 {
-	struct scsi_request *req = scsi_req(rq);
-	int ret = 0;
-
-	pr_debug("rq %p bio %p 0x%x\n", rq, bio, req->result);
-	/*
-	 * fill in all the output members
-	 */
-	hdr->device_status = req->result & 0xff;
-	hdr->transport_status = host_byte(req->result);
-	hdr->driver_status = driver_byte(req->result);
-	hdr->info = 0;
-	if (hdr->device_status || hdr->transport_status || hdr->driver_status)
-		hdr->info |= SG_INFO_CHECK;
-	hdr->response_len = 0;
-
-	if (req->sense_len && hdr->response) {
-		int len = min_t(unsigned int, hdr->max_response_len,
-					req->sense_len);
+	int ret;
 
-		ret = copy_to_user((void __user *)(unsigned long)hdr->response,
-				   req->sense, len);
-		if (!ret)
-			hdr->response_len = len;
-		else
-			ret = -EFAULT;
-	}
+	ret = rq->q->bsg_dev.ops->complete_rq(rq, hdr);
 
 	if (rq->next_rq) {
-		hdr->dout_resid = req->resid_len;
-		hdr->din_resid = scsi_req(rq->next_rq)->resid_len;
 		blk_rq_unmap_user(bidi_bio);
 		blk_put_request(rq->next_rq);
-	} else if (rq_data_dir(rq) == READ)
-		hdr->din_resid = req->resid_len;
-	else
-		hdr->dout_resid = req->resid_len;
-
-	/*
-	 * If the request generated a negative error number, return it
-	 * (providing we aren't already returning an error); if it's
-	 * just a protocol response (i.e. non negative), that gets
-	 * processed above.
-	 */
-	if (!ret && req->result < 0)
-		ret = req->result;
+	}
 
 	blk_rq_unmap_user(bio);
-	scsi_req_free_cmd(req);
+	rq->q->bsg_dev.ops->free_rq(rq);
 	blk_put_request(rq);
-
 	return ret;
 }
 
@@ -614,7 +579,7 @@ static int __bsg_write(struct bsg_device *bd, const char __user *buf,
 		/*
 		 * get a request, fill in the blanks, and add to request queue
 		 */
-		rq = bsg_map_hdr(bd, &bc->hdr, mode);
+		rq = bsg_map_hdr(bd->queue, &bc->hdr, mode);
 		if (IS_ERR(rq)) {
 			ret = PTR_ERR(rq);
 			rq = NULL;
@@ -742,11 +707,6 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
 	struct bsg_device *bd;
 	unsigned char buf[32];
 
-	if (!blk_queue_scsi_passthrough(rq)) {
-		WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
-		return ERR_PTR(-EINVAL);
-	}
-
 	if (!blk_get_queue(rq))
 		return ERR_PTR(-ENXIO);
 
@@ -907,7 +867,7 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		if (copy_from_user(&hdr, uarg, sizeof(hdr)))
 			return -EFAULT;
 
-		rq = bsg_map_hdr(bd, &hdr, file->f_mode);
+		rq = bsg_map_hdr(bd->queue, &hdr, file->f_mode);
 		if (IS_ERR(rq))
 			return PTR_ERR(rq);
 
@@ -959,7 +919,8 @@ void bsg_unregister_queue(struct request_queue *q)
 EXPORT_SYMBOL_GPL(bsg_unregister_queue);
 
 int bsg_register_queue(struct request_queue *q, struct device *parent,
-		       const char *name, void (*release)(struct device *))
+		const char *name, const struct bsg_ops *ops,
+		void (*release)(struct device *))
 {
 	struct bsg_class_device *bcd;
 	dev_t dev;
@@ -996,6 +957,7 @@ int bsg_register_queue(struct request_queue *q, struct device *parent,
 	bcd->queue = q;
 	bcd->parent = get_device(parent);
 	bcd->release = release;
+	bcd->ops = ops;
 	kref_init(&bcd->ref);
 	dev = MKDEV(bsg_major, bcd->minor);
 	class_dev = device_create(bsg_class, parent, dev, NULL, "%s", devname);
@@ -1023,7 +985,17 @@ unlock:
 	mutex_unlock(&bsg_mutex);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(bsg_register_queue);
+
+int bsg_scsi_register_queue(struct request_queue *q, struct device *parent)
+{
+	if (!blk_queue_scsi_passthrough(q)) {
+		WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
+		return -EINVAL;
+	}
+
+	return bsg_register_queue(q, parent, NULL, &bsg_scsi_ops, NULL);
+}
+EXPORT_SYMBOL_GPL(bsg_scsi_register_queue);
 
 static struct cdev bsg_cdev;
 
diff --git a/block/sed-opal.c b/block/sed-opal.c
index e4929eec547f..945f4b8610e0 100644
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -554,15 +554,14 @@ static void add_token_u64(int *err, struct opal_dev *cmd, u64 number)
 
 	size_t len;
 	int msb;
-	u8 n;
 
 	if (!(number & ~TINY_ATOM_DATA_MASK)) {
 		add_token_u8(err, cmd, number);
 		return;
 	}
 
-	msb = fls(number);
-	len = DIV_ROUND_UP(msb, 4);
+	msb = fls64(number);
+	len = DIV_ROUND_UP(msb, 8);
 
 	if (cmd->pos >= IO_BUFFER_LENGTH - len - 1) {
 		pr_debug("Error adding u64: end of buffer.\n");
@@ -570,10 +569,8 @@ static void add_token_u64(int *err, struct opal_dev *cmd, u64 number)
 		return;
 	}
 	add_short_atom_header(cmd, false, false, len);
-	while (len--) {
-		n = number >> (len * 8);
-		add_token_u8(err, cmd, n);
-	}
+	while (len--)
+		add_token_u8(err, cmd, number >> (len * 8));
 }
 
 static void add_token_bytestring(int *err, struct opal_dev *cmd,
@@ -871,6 +868,9 @@ static int response_parse(const u8 *buf, size_t length,
 static size_t response_get_string(const struct parsed_resp *resp, int n,
 				  const char **store)
 {
+	u8 skip;
+	const struct opal_resp_tok *token;
+
 	*store = NULL;
 	if (!resp) {
 		pr_debug("Response is NULL\n");
@@ -883,13 +883,30 @@ static size_t response_get_string(const struct parsed_resp *resp, int n,
 		return 0;
 	}
 
-	if (resp->toks[n].type != OPAL_DTA_TOKENID_BYTESTRING) {
+	token = &resp->toks[n];
+	if (token->type != OPAL_DTA_TOKENID_BYTESTRING) {
 		pr_debug("Token is not a byte string!\n");
 		return 0;
 	}
 
-	*store = resp->toks[n].pos + 1;
-	return resp->toks[n].len - 1;
+	switch (token->width) {
+	case OPAL_WIDTH_TINY:
+	case OPAL_WIDTH_SHORT:
+		skip = 1;
+		break;
+	case OPAL_WIDTH_MEDIUM:
+		skip = 2;
+		break;
+	case OPAL_WIDTH_LONG:
+		skip = 4;
+		break;
+	default:
+		pr_debug("Token has invalid width!\n");
+		return 0;
+	}
+
+	*store = token->pos + skip;
+	return token->len - skip;
 }
 
 static u64 response_get_u64(const struct parsed_resp *resp, int n)
author	Linus Torvalds <torvalds@linux-foundation.org>	2018-04-05 14:27:02 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2018-04-05 14:27:02 -0700
commit	3526dd0c7832f1011a0477cc6d903662bae05ea8 (patch)
tree	22fbac64eb40a0b29bfa4c029695f39b2f591e62 /block
parent	Merge tag 'edac_for_4.17' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp (diff)
parent	blk-mq: Directly schedule q->timeout_work when aborting a request (diff)
download	wireguard-linux-3526dd0c7832f1011a0477cc6d903662bae05ea8.tar.xz wireguard-linux-3526dd0c7832f1011a0477cc6d903662bae05ea8.zip