aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-12-28 13:19:59 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-12-28 13:19:59 -0800
commit0e9da3fbf7d81f0f913b491c8de1ba7883d4f217 (patch)
tree2b3d25e3be60bf4ee40b4690c7bb9d6fa499ae69 /include
parentMerge tag 'y2038-for-4.21' of ssh://gitolite.kernel.org:/pub/scm/linux/kernel/git/arnd/playground (diff)
parentkyber: use sbitmap add_wait_queue/list_del wait helpers (diff)
downloadlinux-dev-0e9da3fbf7d81f0f913b491c8de1ba7883d4f217.tar.xz
linux-dev-0e9da3fbf7d81f0f913b491c8de1ba7883d4f217.zip
Merge tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: "This is the main pull request for block/storage for 4.21. Larger than usual, it was a busy round with lots of goodies queued up. Most notable is the removal of the old IO stack, which has been a long time coming. No new features for a while, everything coming in this week has all been fixes for things that were previously merged. This contains: - Use atomic counters instead of semaphores for mtip32xx (Arnd) - Cleanup of the mtip32xx request setup (Christoph) - Fix for circular locking dependency in loop (Jan, Tetsuo) - bcache (Coly, Guoju, Shenghui) * Optimizations for writeback caching * Various fixes and improvements - nvme (Chaitanya, Christoph, Sagi, Jay, me, Keith) * host and target support for NVMe over TCP * Error log page support * Support for separate read/write/poll queues * Much improved polling * discard OOM fallback * Tracepoint improvements - lightnvm (Hans, Hua, Igor, Matias, Javier) * Igor added packed metadata to pblk. Now drives without metadata per LBA can be used as well. * Fix from Geert on uninitialized value on chunk metadata reads. * Fixes from Hans and Javier to pblk recovery and write path. * Fix from Hua Su to fix a race condition in the pblk recovery code. * Scan optimization added to pblk recovery from Zhoujie. * Small geometry cleanup from me. - Conversion of the last few drivers that used the legacy path to blk-mq (me) - Removal of legacy IO path in SCSI (me, Christoph) - Removal of legacy IO stack and schedulers (me) - Support for much better polling, now without interrupts at all. blk-mq adds support for multiple queue maps, which enables us to have a map per type. This in turn enables nvme to have separate completion queues for polling, which can then be interrupt-less. Also means we're ready for async polled IO, which is hopefully coming in the next release. - Killing of (now) unused block exports (Christoph) - Unification of the blk-rq-qos and blk-wbt wait handling (Josef) - Support for zoned testing with null_blk (Masato) - sx8 conversion to per-host tag sets (Christoph) - IO priority improvements (Damien) - mq-deadline zoned fix (Damien) - Ref count blkcg series (Dennis) - Lots of blk-mq improvements and speedups (me) - sbitmap scalability improvements (me) - Make core inflight IO accounting per-cpu (Mikulas) - Export timeout setting in sysfs (Weiping) - Cleanup the direct issue path (Jianchao) - Export blk-wbt internals in block debugfs for easier debugging (Ming) - Lots of other fixes and improvements" * tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block: (364 commits) kyber: use sbitmap add_wait_queue/list_del wait helpers sbitmap: add helpers for add/del wait queue handling block: save irq state in blkg_lookup_create() dm: don't reuse bio for flushes nvme-pci: trace SQ status on completions nvme-rdma: implement polling queue map nvme-fabrics: allow user to pass in nr_poll_queues nvme-fabrics: allow nvmf_connect_io_queue to poll nvme-core: optionally poll sync commands block: make request_to_qc_t public nvme-tcp: fix spelling mistake "attepmpt" -> "attempt" nvme-tcp: fix endianess annotations nvmet-tcp: fix endianess annotations nvme-pci: refactor nvme_poll_irqdisable to make sparse happy nvme-pci: only set nr_maps to 2 if poll queues are supported nvmet: use a macro for default error location nvmet: fix comparison of a u16 with -1 blk-mq: enable IO poll if .nr_queues of type poll > 0 blk-mq: change blk_mq_queue_busy() to blk_mq_queue_inflight() blk-mq: skip zero-queue maps in blk_mq_map_swqueue ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/bio.h29
-rw-r--r--include/linux/blk-cgroup.h227
-rw-r--r--include/linux/blk-mq-pci.h4
-rw-r--r--include/linux/blk-mq-rdma.h2
-rw-r--r--include/linux/blk-mq-virtio.h4
-rw-r--r--include/linux/blk-mq.h83
-rw-r--r--include/linux/blk_types.h24
-rw-r--r--include/linux/blkdev.h250
-rw-r--r--include/linux/bsg-lib.h6
-rw-r--r--include/linux/cgroup.h2
-rw-r--r--include/linux/elevator.h94
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/genhd.h57
-rw-r--r--include/linux/ide.h14
-rw-r--r--include/linux/init.h1
-rw-r--r--include/linux/ioprio.h13
-rw-r--r--include/linux/lightnvm.h3
-rw-r--r--include/linux/nvme-fc-driver.h17
-rw-r--r--include/linux/nvme-tcp.h189
-rw-r--r--include/linux/nvme.h73
-rw-r--r--include/linux/sbitmap.h89
-rw-r--r--include/linux/skbuff.h3
-rw-r--r--include/linux/uio.h5
-rw-r--r--include/linux/writeback.h5
-rw-r--r--include/scsi/scsi_cmnd.h6
-rw-r--r--include/scsi/scsi_dh.h2
-rw-r--r--include/scsi/scsi_driver.h3
-rw-r--r--include/scsi/scsi_host.h18
-rw-r--r--include/scsi/scsi_tcq.h14
-rw-r--r--include/trace/events/bcache.h27
-rw-r--r--include/uapi/linux/aio_abi.h2
31 files changed, 664 insertions, 604 deletions
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 056fb627edb3..7380b094dcca 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -491,35 +491,40 @@ do { \
bio_clear_flag(bio, BIO_THROTTLED);\
(bio)->bi_disk = (bdev)->bd_disk; \
(bio)->bi_partno = (bdev)->bd_partno; \
+ bio_associate_blkg(bio); \
} while (0)
#define bio_copy_dev(dst, src) \
do { \
(dst)->bi_disk = (src)->bi_disk; \
(dst)->bi_partno = (src)->bi_partno; \
+ bio_clone_blkg_association(dst, src); \
} while (0)
#define bio_dev(bio) \
disk_devt((bio)->bi_disk)
#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
-int bio_associate_blkcg_from_page(struct bio *bio, struct page *page);
+void bio_associate_blkg_from_page(struct bio *bio, struct page *page);
#else
-static inline int bio_associate_blkcg_from_page(struct bio *bio,
- struct page *page) { return 0; }
+static inline void bio_associate_blkg_from_page(struct bio *bio,
+ struct page *page) { }
#endif
#ifdef CONFIG_BLK_CGROUP
-int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
-int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg);
-void bio_disassociate_task(struct bio *bio);
-void bio_clone_blkcg_association(struct bio *dst, struct bio *src);
+void bio_disassociate_blkg(struct bio *bio);
+void bio_associate_blkg(struct bio *bio);
+void bio_associate_blkg_from_css(struct bio *bio,
+ struct cgroup_subsys_state *css);
+void bio_clone_blkg_association(struct bio *dst, struct bio *src);
#else /* CONFIG_BLK_CGROUP */
-static inline int bio_associate_blkcg(struct bio *bio,
- struct cgroup_subsys_state *blkcg_css) { return 0; }
-static inline void bio_disassociate_task(struct bio *bio) { }
-static inline void bio_clone_blkcg_association(struct bio *dst,
- struct bio *src) { }
+static inline void bio_disassociate_blkg(struct bio *bio) { }
+static inline void bio_associate_blkg(struct bio *bio) { }
+static inline void bio_associate_blkg_from_css(struct bio *bio,
+ struct cgroup_subsys_state *css)
+{ }
+static inline void bio_clone_blkg_association(struct bio *dst,
+ struct bio *src) { }
#endif /* CONFIG_BLK_CGROUP */
#ifdef CONFIG_HIGHMEM
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 6d766a19f2bb..f025fd1e22e6 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -21,6 +21,7 @@
#include <linux/blkdev.h>
#include <linux/atomic.h>
#include <linux/kthread.h>
+#include <linux/fs.h>
/* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
#define BLKG_STAT_CPU_BATCH (INT_MAX / 2)
@@ -122,11 +123,8 @@ struct blkcg_gq {
/* all non-root blkcg_gq's are guaranteed to have access to parent */
struct blkcg_gq *parent;
- /* request allocation list for this blkcg-q pair */
- struct request_list rl;
-
/* reference count */
- atomic_t refcnt;
+ struct percpu_ref refcnt;
/* is this blkg online? protected by both blkcg and q locks */
bool online;
@@ -184,6 +182,8 @@ extern struct cgroup_subsys_state * const blkcg_root_css;
struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
struct request_queue *q, bool update_hint);
+struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
+ struct request_queue *q);
struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
struct request_queue *q);
int blkcg_init_queue(struct request_queue *q);
@@ -230,22 +230,62 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
char *input, struct blkg_conf_ctx *ctx);
void blkg_conf_finish(struct blkg_conf_ctx *ctx);
+/**
+ * blkcg_css - find the current css
+ *
+ * Find the css associated with either the kthread or the current task.
+ * This may return a dying css, so it is up to the caller to use tryget logic
+ * to confirm it is alive and well.
+ */
+static inline struct cgroup_subsys_state *blkcg_css(void)
+{
+ struct cgroup_subsys_state *css;
+
+ css = kthread_blkcg();
+ if (css)
+ return css;
+ return task_css(current, io_cgrp_id);
+}
static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
{
return css ? container_of(css, struct blkcg, css) : NULL;
}
-static inline struct blkcg *bio_blkcg(struct bio *bio)
+/**
+ * __bio_blkcg - internal, inconsistent version to get blkcg
+ *
+ * DO NOT USE.
+ * This function is inconsistent and consequently is dangerous to use. The
+ * first part of the function returns a blkcg where a reference is owned by the
+ * bio. This means it does not need to be rcu protected as it cannot go away
+ * with the bio owning a reference to it. However, the latter potentially gets
+ * it from task_css(). This can race against task migration and the cgroup
+ * dying. It is also semantically different as it must be called rcu protected
+ * and is susceptible to failure when trying to get a reference to it.
+ * Therefore, it is not ok to assume that *_get() will always succeed on the
+ * blkcg returned here.
+ */
+static inline struct blkcg *__bio_blkcg(struct bio *bio)
{
- struct cgroup_subsys_state *css;
+ if (bio && bio->bi_blkg)
+ return bio->bi_blkg->blkcg;
+ return css_to_blkcg(blkcg_css());
+}
- if (bio && bio->bi_css)
- return css_to_blkcg(bio->bi_css);
- css = kthread_blkcg();
- if (css)
- return css_to_blkcg(css);
- return css_to_blkcg(task_css(current, io_cgrp_id));
+/**
+ * bio_blkcg - grab the blkcg associated with a bio
+ * @bio: target bio
+ *
+ * This returns the blkcg associated with a bio, %NULL if not associated.
+ * Callers are expected to either handle %NULL or know association has been
+ * done prior to calling this.
+ */
+static inline struct blkcg *bio_blkcg(struct bio *bio)
+{
+ if (bio && bio->bi_blkg)
+ return bio->bi_blkg->blkcg;
+ return NULL;
}
static inline bool blk_cgroup_congested(void)
@@ -328,16 +368,12 @@ static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
* @q: request_queue of interest
*
* Lookup blkg for the @blkcg - @q pair. This function should be called
- * under RCU read lock and is guaranteed to return %NULL if @q is bypassing
- * - see blk_queue_bypass_start() for details.
+ * under RCU read loc.
*/
static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
struct request_queue *q)
{
WARN_ON_ONCE(!rcu_read_lock_held());
-
- if (unlikely(blk_queue_bypass(q)))
- return NULL;
return __blkg_lookup(blkcg, q, false);
}
@@ -451,26 +487,35 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
*/
static inline void blkg_get(struct blkcg_gq *blkg)
{
- WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
- atomic_inc(&blkg->refcnt);
+ percpu_ref_get(&blkg->refcnt);
}
/**
- * blkg_try_get - try and get a blkg reference
+ * blkg_tryget - try and get a blkg reference
* @blkg: blkg to get
*
* This is for use when doing an RCU lookup of the blkg. We may be in the midst
* of freeing this blkg, so we can only use it if the refcnt is not zero.
*/
-static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg)
+static inline bool blkg_tryget(struct blkcg_gq *blkg)
{
- if (atomic_inc_not_zero(&blkg->refcnt))
- return blkg;
- return NULL;
+ return percpu_ref_tryget(&blkg->refcnt);
}
+/**
+ * blkg_tryget_closest - try and get a blkg ref on the closet blkg
+ * @blkg: blkg to get
+ *
+ * This walks up the blkg tree to find the closest non-dying blkg and returns
+ * the blkg that it did association with as it may not be the passed in blkg.
+ */
+static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
+{
+ while (blkg && !percpu_ref_tryget(&blkg->refcnt))
+ blkg = blkg->parent;
-void __blkg_release_rcu(struct rcu_head *rcu);
+ return blkg;
+}
/**
* blkg_put - put a blkg reference
@@ -478,9 +523,7 @@ void __blkg_release_rcu(struct rcu_head *rcu);
*/
static inline void blkg_put(struct blkcg_gq *blkg)
{
- WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
- if (atomic_dec_and_test(&blkg->refcnt))
- call_rcu(&blkg->rcu_head, __blkg_release_rcu);
+ percpu_ref_put(&blkg->refcnt);
}
/**
@@ -515,94 +558,6 @@ static inline void blkg_put(struct blkcg_gq *blkg)
if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \
(p_blkg)->q, false)))
-/**
- * blk_get_rl - get request_list to use
- * @q: request_queue of interest
- * @bio: bio which will be attached to the allocated request (may be %NULL)
- *
- * The caller wants to allocate a request from @q to use for @bio. Find
- * the request_list to use and obtain a reference on it. Should be called
- * under queue_lock. This function is guaranteed to return non-%NULL
- * request_list.
- */
-static inline struct request_list *blk_get_rl(struct request_queue *q,
- struct bio *bio)
-{
- struct blkcg *blkcg;
- struct blkcg_gq *blkg;
-
- rcu_read_lock();
-
- blkcg = bio_blkcg(bio);
-
- /* bypass blkg lookup and use @q->root_rl directly for root */
- if (blkcg == &blkcg_root)
- goto root_rl;
-
- /*
- * Try to use blkg->rl. blkg lookup may fail under memory pressure
- * or if either the blkcg or queue is going away. Fall back to
- * root_rl in such cases.
- */
- blkg = blkg_lookup(blkcg, q);
- if (unlikely(!blkg))
- goto root_rl;
-
- blkg_get(blkg);
- rcu_read_unlock();
- return &blkg->rl;
-root_rl:
- rcu_read_unlock();
- return &q->root_rl;
-}
-
-/**
- * blk_put_rl - put request_list
- * @rl: request_list to put
- *
- * Put the reference acquired by blk_get_rl(). Should be called under
- * queue_lock.
- */
-static inline void blk_put_rl(struct request_list *rl)
-{
- if (rl->blkg->blkcg != &blkcg_root)
- blkg_put(rl->blkg);
-}
-
-/**
- * blk_rq_set_rl - associate a request with a request_list
- * @rq: request of interest
- * @rl: target request_list
- *
- * Associate @rq with @rl so that accounting and freeing can know the
- * request_list @rq came from.
- */
-static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl)
-{
- rq->rl = rl;
-}
-
-/**
- * blk_rq_rl - return the request_list a request came from
- * @rq: request of interest
- *
- * Return the request_list @rq is allocated from.
- */
-static inline struct request_list *blk_rq_rl(struct request *rq)
-{
- return rq->rl;
-}
-
-struct request_list *__blk_queue_next_rl(struct request_list *rl,
- struct request_queue *q);
-/**
- * blk_queue_for_each_rl - iterate through all request_lists of a request_queue
- *
- * Should be used under queue_lock.
- */
-#define blk_queue_for_each_rl(rl, q) \
- for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q)))
-
static inline int blkg_stat_init(struct blkg_stat *stat, gfp_t gfp)
{
int ret;
@@ -797,32 +752,34 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg
struct bio *bio) { return false; }
#endif
+
+static inline void blkcg_bio_issue_init(struct bio *bio)
+{
+ bio_issue_init(&bio->bi_issue, bio_sectors(bio));
+}
+
static inline bool blkcg_bio_issue_check(struct request_queue *q,
struct bio *bio)
{
- struct blkcg *blkcg;
struct blkcg_gq *blkg;
bool throtl = false;
rcu_read_lock();
- blkcg = bio_blkcg(bio);
-
- /* associate blkcg if bio hasn't attached one */
- bio_associate_blkcg(bio, &blkcg->css);
-
- blkg = blkg_lookup(blkcg, q);
- if (unlikely(!blkg)) {
- spin_lock_irq(q->queue_lock);
- blkg = blkg_lookup_create(blkcg, q);
- if (IS_ERR(blkg))
- blkg = NULL;
- spin_unlock_irq(q->queue_lock);
+
+ if (!bio->bi_blkg) {
+ char b[BDEVNAME_SIZE];
+
+ WARN_ONCE(1,
+ "no blkg associated for bio on block-device: %s\n",
+ bio_devname(bio, b));
+ bio_associate_blkg(bio);
}
+ blkg = bio->bi_blkg;
+
throtl = blk_throtl_bio(q, blkg, bio);
if (!throtl) {
- blkg = blkg ?: q->root_blkg;
/*
* If the bio is flagged with BIO_QUEUE_ENTERED it means this
* is a split bio and we would have already accounted for the
@@ -834,6 +791,8 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
}
+ blkcg_bio_issue_init(bio);
+
rcu_read_unlock();
return !throtl;
}
@@ -930,6 +889,7 @@ static inline int blkcg_activate_policy(struct request_queue *q,
static inline void blkcg_deactivate_policy(struct request_queue *q,
const struct blkcg_policy *pol) { }
+static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
@@ -939,12 +899,7 @@ static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
static inline void blkg_get(struct blkcg_gq *blkg) { }
static inline void blkg_put(struct blkcg_gq *blkg) { }
-static inline struct request_list *blk_get_rl(struct request_queue *q,
- struct bio *bio) { return &q->root_rl; }
-static inline void blk_put_rl(struct request_list *rl) { }
-static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
-static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
-
+static inline void blkcg_bio_issue_init(struct bio *bio) { }
static inline bool blkcg_bio_issue_check(struct request_queue *q,
struct bio *bio) { return true; }
diff --git a/include/linux/blk-mq-pci.h b/include/linux/blk-mq-pci.h
index 9f4c17f0d2d8..0b1f45c62623 100644
--- a/include/linux/blk-mq-pci.h
+++ b/include/linux/blk-mq-pci.h
@@ -2,10 +2,10 @@
#ifndef _LINUX_BLK_MQ_PCI_H
#define _LINUX_BLK_MQ_PCI_H
-struct blk_mq_tag_set;
+struct blk_mq_queue_map;
struct pci_dev;
-int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev,
+int blk_mq_pci_map_queues(struct blk_mq_queue_map *qmap, struct pci_dev *pdev,
int offset);
#endif /* _LINUX_BLK_MQ_PCI_H */
diff --git a/include/linux/blk-mq-rdma.h b/include/linux/blk-mq-rdma.h
index b4ade198007d..7b6ecf9ac4c3 100644
--- a/include/linux/blk-mq-rdma.h
+++ b/include/linux/blk-mq-rdma.h
@@ -4,7 +4,7 @@
struct blk_mq_tag_set;
struct ib_device;
-int blk_mq_rdma_map_queues(struct blk_mq_tag_set *set,
+int blk_mq_rdma_map_queues(struct blk_mq_queue_map *map,
struct ib_device *dev, int first_vec);
#endif /* _LINUX_BLK_MQ_RDMA_H */
diff --git a/include/linux/blk-mq-virtio.h b/include/linux/blk-mq-virtio.h
index 69b4da262c45..687ae287e1dc 100644
--- a/include/linux/blk-mq-virtio.h
+++ b/include/linux/blk-mq-virtio.h
@@ -2,10 +2,10 @@
#ifndef _LINUX_BLK_MQ_VIRTIO_H
#define _LINUX_BLK_MQ_VIRTIO_H
-struct blk_mq_tag_set;
+struct blk_mq_queue_map;
struct virtio_device;
-int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set,
+int blk_mq_virtio_map_queues(struct blk_mq_queue_map *qmap,
struct virtio_device *vdev, int first_vec);
#endif /* _LINUX_BLK_MQ_VIRTIO_H */
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 2286dc12c6bc..0e030f5f76b6 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -37,7 +37,8 @@ struct blk_mq_hw_ctx {
struct blk_mq_ctx *dispatch_from;
unsigned int dispatch_busy;
- unsigned int nr_ctx;
+ unsigned short type;
+ unsigned short nr_ctx;
struct blk_mq_ctx **ctxs;
spinlock_t dispatch_wait_lock;
@@ -74,10 +75,31 @@ struct blk_mq_hw_ctx {
struct srcu_struct srcu[0];
};
+struct blk_mq_queue_map {
+ unsigned int *mq_map;
+ unsigned int nr_queues;
+ unsigned int queue_offset;
+};
+
+enum hctx_type {
+ HCTX_TYPE_DEFAULT, /* all I/O not otherwise accounted for */
+ HCTX_TYPE_READ, /* just for READ I/O */
+ HCTX_TYPE_POLL, /* polled I/O of any kind */
+
+ HCTX_MAX_TYPES,
+};
+
struct blk_mq_tag_set {
- unsigned int *mq_map;
+ /*
+ * map[] holds ctx -> hctx mappings, one map exists for each type
+ * that the driver wishes to support. There are no restrictions
+ * on maps being of the same size, and it's perfectly legal to
+ * share maps between types.
+ */
+ struct blk_mq_queue_map map[HCTX_MAX_TYPES];
+ unsigned int nr_maps; /* nr entries in map[] */
const struct blk_mq_ops *ops;
- unsigned int nr_hw_queues;
+ unsigned int nr_hw_queues; /* nr hw queues across maps */
unsigned int queue_depth; /* max hw supported */
unsigned int reserved_tags;
unsigned int cmd_size; /* per-request extra data */
@@ -99,6 +121,7 @@ struct blk_mq_queue_data {
typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *,
const struct blk_mq_queue_data *);
+typedef void (commit_rqs_fn)(struct blk_mq_hw_ctx *);
typedef bool (get_budget_fn)(struct blk_mq_hw_ctx *);
typedef void (put_budget_fn)(struct blk_mq_hw_ctx *);
typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
@@ -109,11 +132,13 @@ typedef int (init_request_fn)(struct blk_mq_tag_set *set, struct request *,
typedef void (exit_request_fn)(struct blk_mq_tag_set *set, struct request *,
unsigned int);
-typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *,
+typedef bool (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *,
bool);
-typedef void (busy_tag_iter_fn)(struct request *, void *, bool);
-typedef int (poll_fn)(struct blk_mq_hw_ctx *, unsigned int);
+typedef bool (busy_tag_iter_fn)(struct request *, void *, bool);
+typedef int (poll_fn)(struct blk_mq_hw_ctx *);
typedef int (map_queues_fn)(struct blk_mq_tag_set *set);
+typedef bool (busy_fn)(struct request_queue *);
+typedef void (complete_fn)(struct request *);
struct blk_mq_ops {
@@ -123,6 +148,15 @@ struct blk_mq_ops {
queue_rq_fn *queue_rq;
/*
+ * If a driver uses bd->last to judge when to submit requests to
+ * hardware, it must define this function. In case of errors that
+ * make us stop issuing further requests, this hook serves the
+ * purpose of kicking the hardware (which the last request otherwise
+ * would have done).
+ */
+ commit_rqs_fn *commit_rqs;
+
+ /*
* Reserve budget before queue request, once .queue_rq is
* run, it is driver's responsibility to release the
* reserved budget. Also we have to handle failure case
@@ -141,7 +175,7 @@ struct blk_mq_ops {
*/
poll_fn *poll;
- softirq_done_fn *complete;
+ complete_fn *complete;
/*
* Called when the block layer side of a hardware queue has been
@@ -165,6 +199,11 @@ struct blk_mq_ops {
/* Called from inside blk_get_request() */
void (*initialize_rq_fn)(struct request *rq);
+ /*
+ * If set, returns whether or not this queue currently is busy
+ */
+ busy_fn *busy;
+
map_queues_fn *map_queues;
#ifdef CONFIG_BLK_DEBUG_FS
@@ -218,6 +257,8 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
void blk_mq_free_request(struct request *rq);
bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
+bool blk_mq_queue_inflight(struct request_queue *q);
+
enum {
/* return when out of requests */
BLK_MQ_REQ_NOWAIT = (__force blk_mq_req_flags_t)(1 << 0),
@@ -264,7 +305,7 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
bool kick_requeue_list);
void blk_mq_kick_requeue_list(struct request_queue *q);
void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
-void blk_mq_complete_request(struct request *rq);
+bool blk_mq_complete_request(struct request *rq);
bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list,
struct bio *bio);
bool blk_mq_queue_stopped(struct request_queue *q);
@@ -288,24 +329,12 @@ void blk_mq_freeze_queue_wait(struct request_queue *q);
int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
unsigned long timeout);
-int blk_mq_map_queues(struct blk_mq_tag_set *set);
+int blk_mq_map_queues(struct blk_mq_queue_map *qmap);
void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
void blk_mq_quiesce_queue_nowait(struct request_queue *q);
-/**
- * blk_mq_mark_complete() - Set request state to complete
- * @rq: request to set to complete state
- *
- * Returns true if request state was successfully set to complete. If
- * successful, the caller is responsibile for seeing this request is ended, as
- * blk_mq_complete_request will not work again.
- */
-static inline bool blk_mq_mark_complete(struct request *rq)
-{
- return cmpxchg(&rq->state, MQ_RQ_IN_FLIGHT, MQ_RQ_COMPLETE) ==
- MQ_RQ_IN_FLIGHT;
-}
+unsigned int blk_mq_rq_cpu(struct request *rq);
/*
* Driver command data is immediately after the request. So subtract request
@@ -328,4 +357,14 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq)
for ((i) = 0; (i) < (hctx)->nr_ctx && \
({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++)
+static inline blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx,
+ struct request *rq)
+{
+ if (rq->tag != -1)
+ return rq->tag | (hctx->queue_num << BLK_QC_T_SHIFT);
+
+ return rq->internal_tag | (hctx->queue_num << BLK_QC_T_SHIFT) |
+ BLK_QC_T_INTERNAL;
+}
+
#endif
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 1dcf652ba0aa..5c7e7f859a24 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -174,11 +174,11 @@ struct bio {
void *bi_private;
#ifdef CONFIG_BLK_CGROUP
/*
- * Optional ioc and css associated with this bio. Put on bio
- * release. Read comment on top of bio_associate_current().
+ * Represents the association of the css and request_queue for the bio.
+ * If a bio goes direct to device, it will not have a blkg as it will
+ * not have a request_queue associated with it. The reference is put
+ * on release of the bio.
*/
- struct io_context *bi_ioc;
- struct cgroup_subsys_state *bi_css;
struct blkcg_gq *bi_blkg;
struct bio_issue bi_issue;
#endif
@@ -228,6 +228,7 @@ struct bio {
#define BIO_TRACE_COMPLETION 10 /* bio_endio() should trace the final completion
* of this bio. */
#define BIO_QUEUE_ENTERED 11 /* can use blk_queue_enter_live() */
+#define BIO_TRACKED 12 /* set if bio goes through the rq_qos path */
/* See BVEC_POOL_OFFSET below before adding new flags */
@@ -323,6 +324,8 @@ enum req_flag_bits {
/* command specific flags for REQ_OP_WRITE_ZEROES: */
__REQ_NOUNMAP, /* do not free blocks when zeroing */
+ __REQ_HIPRI,
+
/* for driver use */
__REQ_DRV,
__REQ_SWAP, /* swapping request. */
@@ -343,8 +346,8 @@ enum req_flag_bits {
#define REQ_RAHEAD (1ULL << __REQ_RAHEAD)
#define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND)
#define REQ_NOWAIT (1ULL << __REQ_NOWAIT)
-
#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP)
+#define REQ_HIPRI (1ULL << __REQ_HIPRI)
#define REQ_DRV (1ULL << __REQ_DRV)
#define REQ_SWAP (1ULL << __REQ_SWAP)
@@ -422,17 +425,6 @@ static inline bool blk_qc_t_valid(blk_qc_t cookie)
return cookie != BLK_QC_T_NONE;
}
-static inline blk_qc_t blk_tag_to_qc_t(unsigned int tag, unsigned int queue_num,
- bool internal)
-{
- blk_qc_t ret = tag | (queue_num << BLK_QC_T_SHIFT);
-
- if (internal)
- ret |= BLK_QC_T_INTERNAL;
-
- return ret;
-}
-
static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie)
{
return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4293dc1cd160..45552e6eae1e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -58,25 +58,6 @@ struct blk_stat_callback;
typedef void (rq_end_io_fn)(struct request *, blk_status_t);
-#define BLK_RL_SYNCFULL (1U << 0)
-#define BLK_RL_ASYNCFULL (1U << 1)
-
-struct request_list {
- struct request_queue *q; /* the queue this rl belongs to */
-#ifdef CONFIG_BLK_CGROUP
- struct blkcg_gq *blkg; /* blkg this request pool belongs to */
-#endif
- /*
- * count[], starved[], and wait[] are indexed by
- * BLK_RW_SYNC/BLK_RW_ASYNC
- */
- int count[2];
- int starved[2];
- mempool_t *rq_pool;
- wait_queue_head_t wait[2];
- unsigned int flags;
-};
-
/*
* request flags */
typedef __u32 __bitwise req_flags_t;
@@ -85,8 +66,6 @@ typedef __u32 __bitwise req_flags_t;
#define RQF_SORTED ((__force req_flags_t)(1 << 0))
/* drive already may have started this one */
#define RQF_STARTED ((__force req_flags_t)(1 << 1))
-/* uses tagged queueing */
-#define RQF_QUEUED ((__force req_flags_t)(1 << 2))
/* may not be passed by ioscheduler */
#define RQF_SOFTBARRIER ((__force req_flags_t)(1 << 3))
/* request for flush sequence */
@@ -150,8 +129,8 @@ enum mq_rq_state {
struct request {
struct request_queue *q;
struct blk_mq_ctx *mq_ctx;
+ struct blk_mq_hw_ctx *mq_hctx;
- int cpu;
unsigned int cmd_flags; /* op and common flags */
req_flags_t rq_flags;
@@ -245,11 +224,7 @@ struct request {
refcount_t ref;
unsigned int timeout;
-
- /* access through blk_rq_set_deadline, blk_rq_deadline */
- unsigned long __deadline;
-
- struct list_head timeout_list;
+ unsigned long deadline;
union {
struct __call_single_data csd;
@@ -264,10 +239,6 @@ struct request {
/* for bidi */
struct request *next_rq;
-
-#ifdef CONFIG_BLK_CGROUP
- struct request_list *rl; /* rl this rq is alloced from */
-#endif
};
static inline bool blk_op_is_scsi(unsigned int op)
@@ -311,41 +282,21 @@ static inline unsigned short req_get_ioprio(struct request *req)
struct blk_queue_ctx;
-typedef void (request_fn_proc) (struct request_queue *q);
typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio);
-typedef bool (poll_q_fn) (struct request_queue *q, blk_qc_t);
-typedef int (prep_rq_fn) (struct request_queue *, struct request *);
-typedef void (unprep_rq_fn) (struct request_queue *, struct request *);
struct bio_vec;
-typedef void (softirq_done_fn)(struct request *);
typedef int (dma_drain_needed_fn)(struct request *);
-typedef int (lld_busy_fn) (struct request_queue *q);
-typedef int (bsg_job_fn) (struct bsg_job *);
-typedef int (init_rq_fn)(struct request_queue *, struct request *, gfp_t);
-typedef void (exit_rq_fn)(struct request_queue *, struct request *);
enum blk_eh_timer_return {
BLK_EH_DONE, /* drivers has completed the command */
BLK_EH_RESET_TIMER, /* reset timer and try again */
};
-typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *);
-
enum blk_queue_state {
Queue_down,
Queue_up,
};
-struct blk_queue_tag {
- struct request **tag_index; /* map of busy tags */
- unsigned long *tag_map; /* bit map of free/busy tags */
- int max_depth; /* what we will send to device */
- int real_max_depth; /* what the array can hold */
- atomic_t refcnt; /* map can be shared */
- int alloc_policy; /* tag allocation policy */
- int next_tag; /* next tag */
-};
#define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */
#define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */
@@ -444,40 +395,15 @@ struct request_queue {
struct list_head queue_head;
struct request *last_merge;
struct elevator_queue *elevator;
- int nr_rqs[2]; /* # allocated [a]sync rqs */
- int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */
struct blk_queue_stats *stats;
struct rq_qos *rq_qos;
- /*
- * If blkcg is not used, @q->root_rl serves all requests. If blkcg
- * is used, root blkg allocates from @q->root_rl and all other
- * blkgs from their own blkg->rl. Which one to use should be
- * determined using bio_request_list().
- */
- struct request_list root_rl;
-
- request_fn_proc *request_fn;
make_request_fn *make_request_fn;
- poll_q_fn *poll_fn;
- prep_rq_fn *prep_rq_fn;
- unprep_rq_fn *unprep_rq_fn;
- softirq_done_fn *softirq_done_fn;
- rq_timed_out_fn *rq_timed_out_fn;
dma_drain_needed_fn *dma_drain_needed;
- lld_busy_fn *lld_busy_fn;
- /* Called just after a request is allocated */
- init_rq_fn *init_rq_fn;
- /* Called just before a request is freed */
- exit_rq_fn *exit_rq_fn;
- /* Called from inside blk_get_request() */
- void (*initialize_rq_fn)(struct request *rq);
const struct blk_mq_ops *mq_ops;
- unsigned int *mq_map;
-
/* sw queues */
struct blk_mq_ctx __percpu *queue_ctx;
unsigned int nr_queues;
@@ -488,17 +414,6 @@ struct request_queue {
struct blk_mq_hw_ctx **queue_hw_ctx;
unsigned int nr_hw_queues;
- /*
- * Dispatch queue sorting
- */
- sector_t end_sector;
- struct request *boundary_rq;
-
- /*
- * Delayed queue handling
- */
- struct delayed_work delay_work;
-
struct backing_dev_info *backing_dev_info;
/*
@@ -529,13 +444,7 @@ struct request_queue {
*/
gfp_t bounce_gfp;
- /*
- * protects queue structures from reentrancy. ->__queue_lock should
- * _never_ be used directly, it is queue private. always use
- * ->queue_lock.
- */
- spinlock_t __queue_lock;
- spinlock_t *queue_lock;
+ spinlock_t queue_lock;
/*
* queue kobject
@@ -545,7 +454,7 @@ struct request_queue {
/*
* mq queue kobject
*/
- struct kobject mq_kobj;
+ struct kobject *mq_kobj;
#ifdef CONFIG_BLK_DEV_INTEGRITY
struct blk_integrity integrity;
@@ -561,27 +470,12 @@ struct request_queue {
* queue settings
*/
unsigned long nr_requests; /* Max # of requests */
- unsigned int nr_congestion_on;
- unsigned int nr_congestion_off;
- unsigned int nr_batching;
unsigned int dma_drain_size;
void *dma_drain_buffer;
unsigned int dma_pad_mask;
unsigned int dma_alignment;
- struct blk_queue_tag *queue_tags;
-
- unsigned int nr_sorted;
- unsigned int in_flight[2];
-
- /*
- * Number of active block driver functions for which blk_drain_queue()
- * must wait. Must be incremented around functions that unlock the
- * queue_lock internally, e.g. scsi_request_fn().
- */
- unsigned int request_fn_active;
-
unsigned int rq_timeout;
int poll_nsec;
@@ -590,7 +484,6 @@ struct request_queue {
struct timer_list timeout;
struct work_struct timeout_work;
- struct list_head timeout_list;
struct list_head icq_list;
#ifdef CONFIG_BLK_CGROUP
@@ -645,11 +538,9 @@ struct request_queue {
struct mutex sysfs_lock;
- int bypass_depth;
atomic_t mq_freeze_depth;
#if defined(CONFIG_BLK_DEV_BSG)
- bsg_job_fn *bsg_job_fn;
struct bsg_class_device bsg_dev;
#endif
@@ -669,12 +560,12 @@ struct request_queue {
#ifdef CONFIG_BLK_DEBUG_FS
struct dentry *debugfs_dir;
struct dentry *sched_debugfs_dir;
+ struct dentry *rqos_debugfs_dir;
#endif
bool mq_sysfs_init_done;
size_t cmd_size;
- void *rq_alloc_data;
struct work_struct release_work;
@@ -682,10 +573,8 @@ struct request_queue {
u64 write_hints[BLK_MAX_WRITE_HINTS];
};
-#define QUEUE_FLAG_QUEUED 0 /* uses generic tag queueing */
#define QUEUE_FLAG_STOPPED 1 /* queue is stopped */
#define QUEUE_FLAG_DYING 2 /* queue being torn down */
-#define QUEUE_FLAG_BYPASS 3 /* act as dumb FIFO queue */
#define QUEUE_FLAG_BIDI 4 /* queue supports bidi requests */
#define QUEUE_FLAG_NOMERGES 5 /* disable merge attempts */
#define QUEUE_FLAG_SAME_COMP 6 /* complete on same CPU-group */
@@ -718,19 +607,15 @@ struct request_queue {
(1 << QUEUE_FLAG_ADD_RANDOM))
#define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
- (1 << QUEUE_FLAG_SAME_COMP) | \
- (1 << QUEUE_FLAG_POLL))
+ (1 << QUEUE_FLAG_SAME_COMP))
void blk_queue_flag_set(unsigned int flag, struct request_queue *q);
void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
-bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q);
-#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
#define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
#define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
-#define blk_queue_bypass(q) test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags)
#define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags)
#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
#define blk_queue_noxmerges(q) \
@@ -757,32 +642,20 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q);
extern void blk_set_pm_only(struct request_queue *q);
extern void blk_clear_pm_only(struct request_queue *q);
-static inline int queue_in_flight(struct request_queue *q)
-{
- return q->in_flight[0] + q->in_flight[1];
-}
-
static inline bool blk_account_rq(struct request *rq)
{
return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq);
}
-#define blk_rq_cpu_valid(rq) ((rq)->cpu != -1)
#define blk_bidi_rq(rq) ((rq)->next_rq != NULL)
-/* rq->queuelist of dequeued request must be list_empty() */
-#define blk_queued_rq(rq) (!list_empty(&(rq)->queuelist))
#define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)
#define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ)
-/*
- * Driver can handle struct request, if it either has an old style
- * request_fn defined, or is blk-mq based.
- */
-static inline bool queue_is_rq_based(struct request_queue *q)
+static inline bool queue_is_mq(struct request_queue *q)
{
- return q->request_fn || q->mq_ops;
+ return q->mq_ops;
}
static inline unsigned int blk_queue_cluster(struct request_queue *q)
@@ -845,27 +718,6 @@ static inline bool rq_is_sync(struct request *rq)
return op_is_sync(rq->cmd_flags);
}
-static inline bool blk_rl_full(struct request_list *rl, bool sync)
-{
- unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL;
-
- return rl->flags & flag;
-}
-
-static inline void blk_set_rl_full(struct request_list *rl, bool sync)
-{
- unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL;
-
- rl->flags |= flag;
-}
-
-static inline void blk_clear_rl_full(struct request_list *rl, bool sync)
-{
- unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL;
-
- rl->flags &= ~flag;
-}
-
static inline bool rq_mergeable(struct request *rq)
{
if (blk_rq_is_passthrough(rq))
@@ -902,16 +754,6 @@ static inline unsigned int blk_queue_depth(struct request_queue *q)
return q->nr_requests;
}
-/*
- * q->prep_rq_fn return values
- */
-enum {
- BLKPREP_OK, /* serve it */
- BLKPREP_KILL, /* fatal error, kill, return -EIO */
- BLKPREP_DEFER, /* leave on queue */
- BLKPREP_INVALID, /* invalid command, kill, return -EREMOTEIO */
-};
-
extern unsigned long blk_max_low_pfn, blk_max_pfn;
/*
@@ -983,10 +825,8 @@ extern blk_qc_t direct_make_request(struct bio *bio);
extern void blk_rq_init(struct request_queue *q, struct request *rq);
extern void blk_init_request_from_bio(struct request *req, struct bio *bio);
extern void blk_put_request(struct request *);
-extern void __blk_put_request(struct request_queue *, struct request *);
extern struct request *blk_get_request(struct request_queue *, unsigned int op,
blk_mq_req_flags_t flags);
-extern void blk_requeue_request(struct request_queue *, struct request *);
extern int blk_lld_busy(struct request_queue *q);
extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
struct bio_set *bs, gfp_t gfp_mask,
@@ -996,7 +836,6 @@ extern void blk_rq_unprep_clone(struct request *rq);
extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
struct request *rq);
extern int blk_rq_append_bio(struct request *rq, struct bio **bio);
-extern void blk_delay_queue(struct request_queue *, unsigned long);
extern void blk_queue_split(struct request_queue *, struct bio **);
extern void blk_recount_segments(struct request_queue *, struct bio *);
extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int);
@@ -1009,15 +848,7 @@ extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags);
extern void blk_queue_exit(struct request_queue *q);
-extern void blk_start_queue(struct request_queue *q);
-extern void blk_start_queue_async(struct request_queue *q);
-extern void blk_stop_queue(struct request_queue *q);
extern void blk_sync_queue(struct request_queue *q);
-extern void __blk_stop_queue(struct request_queue *q);
-extern void __blk_run_queue(struct request_queue *q);
-extern void __blk_run_queue_uncond(struct request_queue *q);
-extern void blk_run_queue(struct request_queue *);
-extern void blk_run_queue_async(struct request_queue *q);
extern int blk_rq_map_user(struct request_queue *, struct request *,
struct rq_map_data *, void __user *, unsigned long,
gfp_t);
@@ -1034,7 +865,7 @@ extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
int blk_status_to_errno(blk_status_t status);
blk_status_t errno_to_blk_status(int errno);
-bool blk_poll(struct request_queue *q, blk_qc_t cookie);
+int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin);
static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
{
@@ -1172,13 +1003,6 @@ static inline unsigned int blk_rq_count_bios(struct request *rq)
return nr_bios;
}
-/*
- * Request issue related functions.
- */
-extern struct request *blk_peek_request(struct request_queue *q);
-extern void blk_start_request(struct request *rq);
-extern struct request *blk_fetch_request(struct request_queue *q);
-
void blk_steal_bios(struct bio_list *list, struct request *rq);
/*
@@ -1196,27 +1020,18 @@ void blk_steal_bios(struct bio_list *list, struct request *rq);
*/
extern bool blk_update_request(struct request *rq, blk_status_t error,
unsigned int nr_bytes);
-extern void blk_finish_request(struct request *rq, blk_status_t error);
-extern bool blk_end_request(struct request *rq, blk_status_t error,
- unsigned int nr_bytes);
extern void blk_end_request_all(struct request *rq, blk_status_t error);
extern bool __blk_end_request(struct request *rq, blk_status_t error,
unsigned int nr_bytes);
extern void __blk_end_request_all(struct request *rq, blk_status_t error);
extern bool __blk_end_request_cur(struct request *rq, blk_status_t error);
-extern void blk_complete_request(struct request *);
extern void __blk_complete_request(struct request *);
extern void blk_abort_request(struct request *);
-extern void blk_unprep_request(struct request *);
/*
* Access functions for manipulating queue properties
*/
-extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn,
- spinlock_t *lock, int node_id);
-extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *);
-extern int blk_init_allocated_queue(struct request_queue *);
extern void blk_cleanup_queue(struct request_queue *);
extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
extern void blk_queue_bounce_limit(struct request_queue *, u64);
@@ -1255,15 +1070,10 @@ extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
extern int blk_queue_dma_drain(struct request_queue *q,
dma_drain_needed_fn *dma_drain_needed,
void *buf, unsigned int size);
-extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn);
extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
extern void blk_queue_virt_boundary(struct request_queue *, unsigned long);
-extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn);
-extern void blk_queue_unprep_rq(struct request_queue *, unprep_rq_fn *ufn);
extern void blk_queue_dma_alignment(struct request_queue *, int);
extern void blk_queue_update_dma_alignment(struct request_queue *, int);
-extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
-extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
@@ -1299,8 +1109,7 @@ extern long nr_blockdev_pages(void);
bool __must_check blk_get_queue(struct request_queue *);
struct request_queue *blk_alloc_queue(gfp_t);
-struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
- spinlock_t *lock);
+struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id);
extern void blk_put_queue(struct request_queue *);
extern void blk_set_queue_dying(struct request_queue *);
@@ -1317,9 +1126,10 @@ extern void blk_set_queue_dying(struct request_queue *);
* schedule() where blk_schedule_flush_plug() is called.
*/
struct blk_plug {
- struct list_head list; /* requests */
struct list_head mq_list; /* blk-mq requests */
struct list_head cb_list; /* md requires an unplug callback */
+ unsigned short rq_count;
+ bool multiple_queues;
};
#define BLK_MAX_REQUEST_COUNT 16
#define BLK_PLUG_FLUSH_SIZE (128 * 1024)
@@ -1358,31 +1168,10 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
struct blk_plug *plug = tsk->plug;
return plug &&
- (!list_empty(&plug->list) ||
- !list_empty(&plug->mq_list) ||
+ (!list_empty(&plug->mq_list) ||
!list_empty(&plug->cb_list));
}
-/*
- * tag stuff
- */
-extern int blk_queue_start_tag(struct request_queue *, struct request *);
-extern struct request *blk_queue_find_tag(struct request_queue *, int);
-extern void blk_queue_end_tag(struct request_queue *, struct request *);
-extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *, int);
-extern void blk_queue_free_tags(struct request_queue *);
-extern int blk_queue_resize_tags(struct request_queue *, int);
-extern struct blk_queue_tag *blk_init_tags(int, int);
-extern void blk_free_tags(struct blk_queue_tag *);
-
-static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
- int tag)
-{
- if (unlikely(bqt == NULL || tag >= bqt->real_max_depth))
- return NULL;
- return bqt->tag_index[tag];
-}
-
extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *);
extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct page *page);
@@ -1982,4 +1771,17 @@ static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
#endif /* CONFIG_BLOCK */
+static inline void blk_wake_io_task(struct task_struct *waiter)
+{
+ /*
+ * If we're polling, the task itself is doing the completions. For
+ * that case, we don't need to signal a wakeup, it's enough to just
+ * mark us as RUNNING.
+ */
+ if (waiter == current)
+ __set_current_state(TASK_RUNNING);
+ else
+ wake_up_process(waiter);
+}
+
#endif
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h
index 6aeaf6472665..b356e0006731 100644
--- a/include/linux/bsg-lib.h
+++ b/include/linux/bsg-lib.h
@@ -31,6 +31,9 @@ struct device;
struct scatterlist;
struct request_queue;
+typedef int (bsg_job_fn) (struct bsg_job *);
+typedef enum blk_eh_timer_return (bsg_timeout_fn)(struct request *);
+
struct bsg_buffer {
unsigned int payload_len;
int sg_cnt;
@@ -72,7 +75,8 @@ struct bsg_job {
void bsg_job_done(struct bsg_job *job, int result,
unsigned int reply_payload_rcv_len);
struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
- bsg_job_fn *job_fn, int dd_job_size);
+ bsg_job_fn *job_fn, bsg_timeout_fn *timeout, int dd_job_size);
+void bsg_remove_queue(struct request_queue *q);
void bsg_job_put(struct bsg_job *job);
int __must_check bsg_job_get(struct bsg_job *job);
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 9d12757a65b0..9968332cceed 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -93,6 +93,8 @@ extern struct css_set init_css_set;
bool css_has_online_children(struct cgroup_subsys_state *css);
struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
+struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup,
+ struct cgroup_subsys *ss);
struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup,
struct cgroup_subsys *ss);
struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 015bb59c0331..2e9e2763bf47 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -23,74 +23,6 @@ enum elv_merge {
ELEVATOR_DISCARD_MERGE = 3,
};
-typedef enum elv_merge (elevator_merge_fn) (struct request_queue *, struct request **,
- struct bio *);
-
-typedef void (elevator_merge_req_fn) (struct request_queue *, struct request *, struct request *);
-
-typedef void (elevator_merged_fn) (struct request_queue *, struct request *, enum elv_merge);
-
-typedef int (elevator_allow_bio_merge_fn) (struct request_queue *,
- struct request *, struct bio *);
-
-typedef int (elevator_allow_rq_merge_fn) (struct request_queue *,
- struct request *, struct request *);
-
-typedef void (elevator_bio_merged_fn) (struct request_queue *,
- struct request *, struct bio *);
-
-typedef int (elevator_dispatch_fn) (struct request_queue *, int);
-
-typedef void (elevator_add_req_fn) (struct request_queue *, struct request *);
-typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *);
-typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *);
-typedef int (elevator_may_queue_fn) (struct request_queue *, unsigned int);
-
-typedef void (elevator_init_icq_fn) (struct io_cq *);
-typedef void (elevator_exit_icq_fn) (struct io_cq *);
-typedef int (elevator_set_req_fn) (struct request_queue *, struct request *,
- struct bio *, gfp_t);
-typedef void (elevator_put_req_fn) (struct request *);
-typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *);
-typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *);
-
-typedef int (elevator_init_fn) (struct request_queue *,
- struct elevator_type *e);
-typedef void (elevator_exit_fn) (struct elevator_queue *);
-typedef void (elevator_registered_fn) (struct request_queue *);
-
-struct elevator_ops
-{
- elevator_merge_fn *elevator_merge_fn;
- elevator_merged_fn *elevator_merged_fn;
- elevator_merge_req_fn *elevator_merge_req_fn;
- elevator_allow_bio_merge_fn *elevator_allow_bio_merge_fn;
- elevator_allow_rq_merge_fn *elevator_allow_rq_merge_fn;
- elevator_bio_merged_fn *elevator_bio_merged_fn;
-
- elevator_dispatch_fn *elevator_dispatch_fn;
- elevator_add_req_fn *elevator_add_req_fn;
- elevator_activate_req_fn *elevator_activate_req_fn;
- elevator_deactivate_req_fn *elevator_deactivate_req_fn;
-
- elevator_completed_req_fn *elevator_completed_req_fn;
-
- elevator_request_list_fn *elevator_former_req_fn;
- elevator_request_list_fn *elevator_latter_req_fn;
-
- elevator_init_icq_fn *elevator_init_icq_fn; /* see iocontext.h */
- elevator_exit_icq_fn *elevator_exit_icq_fn; /* ditto */
-
- elevator_set_req_fn *elevator_set_req_fn;
- elevator_put_req_fn *elevator_put_req_fn;
-
- elevator_may_queue_fn *elevator_may_queue_fn;
-
- elevator_init_fn *elevator_init_fn;
- elevator_exit_fn *elevator_exit_fn;
- elevator_registered_fn *elevator_registered_fn;
-};
-
struct blk_mq_alloc_data;
struct blk_mq_hw_ctx;
@@ -137,17 +69,14 @@ struct elevator_type
struct kmem_cache *icq_cache;
/* fields provided by elevator implementation */
- union {
- struct elevator_ops sq;
- struct elevator_mq_ops mq;
- } ops;
+ struct elevator_mq_ops ops;
+
size_t icq_size; /* see iocontext.h */
size_t icq_align; /* ditto */
struct elv_fs_entry *elevator_attrs;
char elevator_name[ELV_NAME_MAX];
const char *elevator_alias;
struct module *elevator_owner;
- bool uses_mq;
#ifdef CONFIG_BLK_DEBUG_FS
const struct blk_mq_debugfs_attr *queue_debugfs_attrs;
const struct blk_mq_debugfs_attr *hctx_debugfs_attrs;
@@ -175,40 +104,25 @@ struct elevator_queue
struct kobject kobj;
struct mutex sysfs_lock;
unsigned int registered:1;
- unsigned int uses_mq:1;
DECLARE_HASHTABLE(hash, ELV_HASH_BITS);
};
/*
* block elevator interface
*/
-extern void elv_dispatch_sort(struct request_queue *, struct request *);
-extern void elv_dispatch_add_tail(struct request_queue *, struct request *);
-extern void elv_add_request(struct request_queue *, struct request *, int);
-extern void __elv_add_request(struct request_queue *, struct request *, int);
extern enum elv_merge elv_merge(struct request_queue *, struct request **,
struct bio *);
extern void elv_merge_requests(struct request_queue *, struct request *,
struct request *);
extern void elv_merged_request(struct request_queue *, struct request *,
enum elv_merge);
-extern void elv_bio_merged(struct request_queue *q, struct request *,
- struct bio *);
extern bool elv_attempt_insert_merge(struct request_queue *, struct request *);
-extern void elv_requeue_request(struct request_queue *, struct request *);
extern struct request *elv_former_request(struct request_queue *, struct request *);
extern struct request *elv_latter_request(struct request_queue *, struct request *);
-extern int elv_may_queue(struct request_queue *, unsigned int);
-extern void elv_completed_request(struct request_queue *, struct request *);
-extern int elv_set_request(struct request_queue *q, struct request *rq,
- struct bio *bio, gfp_t gfp_mask);
-extern void elv_put_request(struct request_queue *, struct request *);
-extern void elv_drain_elevator(struct request_queue *);
/*
* io scheduler registration
*/
-extern void __init load_default_elevator_module(void);
extern int elv_register(struct elevator_type *);
extern void elv_unregister(struct elevator_type *);
@@ -260,9 +174,5 @@ enum {
#define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist)
#define rq_fifo_clear(rq) list_del_init(&(rq)->queuelist)
-#else /* CONFIG_BLOCK */
-
-static inline void load_default_elevator_module(void) { }
-
#endif /* CONFIG_BLOCK */
#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 26a8607b3c3c..6d52ce6af4ff 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2026,7 +2026,7 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
.ki_filp = filp,
.ki_flags = iocb_flags(filp),
.ki_hint = ki_hint_validate(file_write_hint(filp)),
- .ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0),
+ .ki_ioprio = get_current_ioprio(),
};
}
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 70fc838e6773..06c0fd594097 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -17,6 +17,7 @@
#include <linux/percpu-refcount.h>
#include <linux/uuid.h>
#include <linux/blk_types.h>
+#include <asm/local.h>
#ifdef CONFIG_BLOCK
@@ -89,6 +90,7 @@ struct disk_stats {
unsigned long merges[NR_STAT_GROUPS];
unsigned long io_ticks;
unsigned long time_in_queue;
+ local_t in_flight[2];
};
#define PARTITION_META_INFO_VOLNAMELTH 64
@@ -122,14 +124,13 @@ struct hd_struct {
int make_it_fail;
#endif
unsigned long stamp;
- atomic_t in_flight[2];
#ifdef CONFIG_SMP
struct disk_stats __percpu *dkstats;
#else
struct disk_stats dkstats;
#endif
struct percpu_ref ref;
- struct rcu_head rcu_head;
+ struct rcu_work rcu_work;
};
#define GENHD_FL_REMOVABLE 1
@@ -295,8 +296,11 @@ extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk,
#define part_stat_lock() ({ rcu_read_lock(); get_cpu(); })
#define part_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0)
-#define __part_stat_add(cpu, part, field, addnd) \
- (per_cpu_ptr((part)->dkstats, (cpu))->field += (addnd))
+#define part_stat_get_cpu(part, field, cpu) \
+ (per_cpu_ptr((part)->dkstats, (cpu))->field)
+
+#define part_stat_get(part, field) \
+ part_stat_get_cpu(part, field, smp_processor_id())
#define part_stat_read(part, field) \
({ \
@@ -333,10 +337,9 @@ static inline void free_part_stats(struct hd_struct *part)
#define part_stat_lock() ({ rcu_read_lock(); 0; })
#define part_stat_unlock() rcu_read_unlock()
-#define __part_stat_add(cpu, part, field, addnd) \
- ((part)->dkstats.field += addnd)
-
-#define part_stat_read(part, field) ((part)->dkstats.field)
+#define part_stat_get(part, field) ((part)->dkstats.field)
+#define part_stat_get_cpu(part, field, cpu) part_stat_get(part, field)
+#define part_stat_read(part, field) part_stat_get(part, field)
static inline void part_stat_set_all(struct hd_struct *part, int value)
{
@@ -362,22 +365,33 @@ static inline void free_part_stats(struct hd_struct *part)
part_stat_read(part, field[STAT_WRITE]) + \
part_stat_read(part, field[STAT_DISCARD]))
-#define part_stat_add(cpu, part, field, addnd) do { \
- __part_stat_add((cpu), (part), field, addnd); \
+#define __part_stat_add(part, field, addnd) \
+ (part_stat_get(part, field) += (addnd))
+
+#define part_stat_add(part, field, addnd) do { \
+ __part_stat_add((part), field, addnd); \
if ((part)->partno) \
- __part_stat_add((cpu), &part_to_disk((part))->part0, \
+ __part_stat_add(&part_to_disk((part))->part0, \
field, addnd); \
} while (0)
-#define part_stat_dec(cpu, gendiskp, field) \
- part_stat_add(cpu, gendiskp, field, -1)
-#define part_stat_inc(cpu, gendiskp, field) \
- part_stat_add(cpu, gendiskp, field, 1)
-#define part_stat_sub(cpu, gendiskp, field, subnd) \
- part_stat_add(cpu, gendiskp, field, -subnd)
-
-void part_in_flight(struct request_queue *q, struct hd_struct *part,
- unsigned int inflight[2]);
+#define part_stat_dec(gendiskp, field) \
+ part_stat_add(gendiskp, field, -1)
+#define part_stat_inc(gendiskp, field) \
+ part_stat_add(gendiskp, field, 1)
+#define part_stat_sub(gendiskp, field, subnd) \
+ part_stat_add(gendiskp, field, -subnd)
+
+#define part_stat_local_dec(gendiskp, field) \
+ local_dec(&(part_stat_get(gendiskp, field)))
+#define part_stat_local_inc(gendiskp, field) \
+ local_inc(&(part_stat_get(gendiskp, field)))
+#define part_stat_local_read(gendiskp, field) \
+ local_read(&(part_stat_get(gendiskp, field)))
+#define part_stat_local_read_cpu(gendiskp, field, cpu) \
+ local_read(&(part_stat_get_cpu(gendiskp, field, cpu)))
+
+unsigned int part_in_flight(struct request_queue *q, struct hd_struct *part);
void part_in_flight_rw(struct request_queue *q, struct hd_struct *part,
unsigned int inflight[2]);
void part_dec_in_flight(struct request_queue *q, struct hd_struct *part,
@@ -398,8 +412,7 @@ static inline void free_part_info(struct hd_struct *part)
kfree(part->info);
}
-/* block/blk-core.c */
-extern void part_round_stats(struct request_queue *q, int cpu, struct hd_struct *part);
+void update_io_ticks(struct hd_struct *part, unsigned long now);
/* block/genhd.c */
extern void device_add_disk(struct device *parent, struct gendisk *disk,
diff --git a/include/linux/ide.h b/include/linux/ide.h
index c74b0321922a..e7d29ae633cd 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -10,7 +10,7 @@
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/ata.h>
-#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
#include <linux/proc_fs.h>
#include <linux/interrupt.h>
#include <linux/bitops.h>
@@ -50,6 +50,7 @@ struct ide_request {
struct scsi_request sreq;
u8 sense[SCSI_SENSE_BUFFERSIZE];
u8 type;
+ void *special;
};
static inline struct ide_request *ide_req(struct request *rq)
@@ -529,6 +530,10 @@ struct ide_drive_s {
struct request_queue *queue; /* request queue */
+ bool (*prep_rq)(struct ide_drive_s *, struct request *);
+
+ struct blk_mq_tag_set tag_set;
+
struct request *rq; /* current request */
void *driver_data; /* extra driver data */
u16 *id; /* identification info */
@@ -612,6 +617,10 @@ struct ide_drive_s {
bool sense_rq_armed;
struct request *sense_rq;
struct request_sense sense_data;
+
+ /* async sense insertion */
+ struct work_struct rq_work;
+ struct list_head rq_list;
};
typedef struct ide_drive_s ide_drive_t;
@@ -1089,6 +1098,7 @@ extern int ide_pci_clk;
int ide_end_rq(ide_drive_t *, struct request *, blk_status_t, unsigned int);
void ide_kill_rq(ide_drive_t *, struct request *);
+void ide_insert_request_head(ide_drive_t *, struct request *);
void __ide_set_handler(ide_drive_t *, ide_handler_t *, unsigned int);
void ide_set_handler(ide_drive_t *, ide_handler_t *, unsigned int);
@@ -1208,7 +1218,7 @@ extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout);
extern void ide_timer_expiry(struct timer_list *t);
extern irqreturn_t ide_intr(int irq, void *dev_id);
-extern void do_ide_request(struct request_queue *);
+extern blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *);
extern void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq);
void ide_init_disk(struct gendisk *, ide_drive_t *);
diff --git a/include/linux/init.h b/include/linux/init.h
index 9c2aba1dbabf..5255069f5a9f 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -146,7 +146,6 @@ extern unsigned int reset_devices;
/* used by init/main.c */
void setup_arch(char **);
void prepare_namespace(void);
-void __init load_default_modules(void);
int __init init_rootfs(void);
#if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_STRICT_MODULE_RWX)
diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
index 9e30ed6443db..e9bfe6972aed 100644
--- a/include/linux/ioprio.h
+++ b/include/linux/ioprio.h
@@ -71,6 +71,19 @@ static inline int task_nice_ioclass(struct task_struct *task)
}
/*
+ * If the calling process has set an I/O priority, use that. Otherwise, return
+ * the default I/O priority.
+ */
+static inline int get_current_ioprio(void)
+{
+ struct io_context *ioc = current->io_context;
+
+ if (ioc)
+ return ioc->ioprio;
+ return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
+}
+
+/*
* For inheritance, return the highest of the two given priorities
*/
extern int ioprio_best(unsigned short aprio, unsigned short bprio);
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 2fdeac1a420d..5d865a5d5cdc 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -90,7 +90,7 @@ typedef int (nvm_get_chk_meta_fn)(struct nvm_dev *, sector_t, int,
struct nvm_chk_meta *);
typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
typedef int (nvm_submit_io_sync_fn)(struct nvm_dev *, struct nvm_rq *);
-typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *);
+typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *, int);
typedef void (nvm_destroy_dma_pool_fn)(void *);
typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t,
dma_addr_t *);
@@ -357,6 +357,7 @@ struct nvm_geo {
u32 clba; /* sectors per chunk */
u16 csecs; /* sector size */
u16 sos; /* out-of-band area size */
+ bool ext; /* metadata in extended data buffer */
/* device write constrains */
u32 ws_min; /* minimum write size */
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index 496ff759f84c..91745cc3704c 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -403,7 +403,6 @@ struct nvme_fc_port_template {
void **handle);
void (*delete_queue)(struct nvme_fc_local_port *,
unsigned int qidx, void *handle);
- void (*poll_queue)(struct nvme_fc_local_port *, void *handle);
int (*ls_req)(struct nvme_fc_local_port *,
struct nvme_fc_remote_port *,
struct nvmefc_ls_req *);
@@ -649,22 +648,6 @@ enum {
* sequence in one LLDD operation. Errors during Data
* sequence transmit must not allow RSP sequence to be sent.
*/
- NVMET_FCTGTFEAT_CMD_IN_ISR = (1 << 1),
- /* Bit 2: When 0, the LLDD is calling the cmd rcv handler
- * in a non-isr context, allowing the transport to finish
- * op completion in the calling context. When 1, the LLDD
- * is calling the cmd rcv handler in an ISR context,
- * requiring the transport to transition to a workqueue
- * for op completion.
- */
- NVMET_FCTGTFEAT_OPDONE_IN_ISR = (1 << 2),
- /* Bit 3: When 0, the LLDD is calling the op done handler
- * in a non-isr context, allowing the transport to finish
- * op completion in the calling context. When 1, the LLDD
- * is calling the op done handler in an ISR context,
- * requiring the transport to transition to a workqueue
- * for op completion.
- */
};
diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h
new file mode 100644
index 000000000000..03d87c0550a9
--- /dev/null
+++ b/include/linux/nvme-tcp.h
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NVMe over Fabrics TCP protocol header.
+ * Copyright (c) 2018 Lightbits Labs. All rights reserved.
+ */
+
+#ifndef _LINUX_NVME_TCP_H
+#define _LINUX_NVME_TCP_H
+
+#include <linux/nvme.h>
+
+#define NVME_TCP_DISC_PORT 8009
+#define NVME_TCP_ADMIN_CCSZ SZ_8K
+#define NVME_TCP_DIGEST_LENGTH 4
+
+enum nvme_tcp_pfv {
+ NVME_TCP_PFV_1_0 = 0x0,
+};
+
+enum nvme_tcp_fatal_error_status {
+ NVME_TCP_FES_INVALID_PDU_HDR = 0x01,
+ NVME_TCP_FES_PDU_SEQ_ERR = 0x02,
+ NVME_TCP_FES_HDR_DIGEST_ERR = 0x03,
+ NVME_TCP_FES_DATA_OUT_OF_RANGE = 0x04,
+ NVME_TCP_FES_R2T_LIMIT_EXCEEDED = 0x05,
+ NVME_TCP_FES_DATA_LIMIT_EXCEEDED = 0x05,
+ NVME_TCP_FES_UNSUPPORTED_PARAM = 0x06,
+};
+
+enum nvme_tcp_digest_option {
+ NVME_TCP_HDR_DIGEST_ENABLE = (1 << 0),
+ NVME_TCP_DATA_DIGEST_ENABLE = (1 << 1),
+};
+
+enum nvme_tcp_pdu_type {
+ nvme_tcp_icreq = 0x0,
+ nvme_tcp_icresp = 0x1,
+ nvme_tcp_h2c_term = 0x2,
+ nvme_tcp_c2h_term = 0x3,
+ nvme_tcp_cmd = 0x4,
+ nvme_tcp_rsp = 0x5,
+ nvme_tcp_h2c_data = 0x6,
+ nvme_tcp_c2h_data = 0x7,
+ nvme_tcp_r2t = 0x9,
+};
+
+enum nvme_tcp_pdu_flags {
+ NVME_TCP_F_HDGST = (1 << 0),
+ NVME_TCP_F_DDGST = (1 << 1),
+ NVME_TCP_F_DATA_LAST = (1 << 2),
+ NVME_TCP_F_DATA_SUCCESS = (1 << 3),
+};
+
+/**
+ * struct nvme_tcp_hdr - nvme tcp pdu common header
+ *
+ * @type: pdu type
+ * @flags: pdu specific flags
+ * @hlen: pdu header length
+ * @pdo: pdu data offset
+ * @plen: pdu wire byte length
+ */
+struct nvme_tcp_hdr {
+ __u8 type;
+ __u8 flags;
+ __u8 hlen;
+ __u8 pdo;
+ __le32 plen;
+};
+
+/**
+ * struct nvme_tcp_icreq_pdu - nvme tcp initialize connection request pdu
+ *
+ * @hdr: pdu generic header
+ * @pfv: pdu version format
+ * @hpda: host pdu data alignment (dwords, 0's based)
+ * @digest: digest types enabled
+ * @maxr2t: maximum r2ts per request supported
+ */
+struct nvme_tcp_icreq_pdu {
+ struct nvme_tcp_hdr hdr;
+ __le16 pfv;
+ __u8 hpda;
+ __u8 digest;
+ __le32 maxr2t;
+ __u8 rsvd2[112];
+};
+
+/**
+ * struct nvme_tcp_icresp_pdu - nvme tcp initialize connection response pdu
+ *
+ * @hdr: pdu common header
+ * @pfv: pdu version format
+ * @cpda: controller pdu data alignment (dowrds, 0's based)
+ * @digest: digest types enabled
+ * @maxdata: maximum data capsules per r2t supported
+ */
+struct nvme_tcp_icresp_pdu {
+ struct nvme_tcp_hdr hdr;
+ __le16 pfv;
+ __u8 cpda;
+ __u8 digest;
+ __le32 maxdata;
+ __u8 rsvd[112];
+};
+
+/**
+ * struct nvme_tcp_term_pdu - nvme tcp terminate connection pdu
+ *
+ * @hdr: pdu common header
+ * @fes: fatal error status
+ * @fei: fatal error information
+ */
+struct nvme_tcp_term_pdu {
+ struct nvme_tcp_hdr hdr;
+ __le16 fes;
+ __le32 fei;
+ __u8 rsvd[8];
+};
+
+/**
+ * struct nvme_tcp_cmd_pdu - nvme tcp command capsule pdu
+ *
+ * @hdr: pdu common header
+ * @cmd: nvme command
+ */
+struct nvme_tcp_cmd_pdu {
+ struct nvme_tcp_hdr hdr;
+ struct nvme_command cmd;
+};
+
+/**
+ * struct nvme_tcp_rsp_pdu - nvme tcp response capsule pdu
+ *
+ * @hdr: pdu common header
+ * @hdr: nvme-tcp generic header
+ * @cqe: nvme completion queue entry
+ */
+struct nvme_tcp_rsp_pdu {
+ struct nvme_tcp_hdr hdr;
+ struct nvme_completion cqe;
+};
+
+/**
+ * struct nvme_tcp_r2t_pdu - nvme tcp ready-to-transfer pdu
+ *
+ * @hdr: pdu common header
+ * @command_id: nvme command identifier which this relates to
+ * @ttag: transfer tag (controller generated)
+ * @r2t_offset: offset from the start of the command data
+ * @r2t_length: length the host is allowed to send
+ */
+struct nvme_tcp_r2t_pdu {
+ struct nvme_tcp_hdr hdr;
+ __u16 command_id;
+ __u16 ttag;
+ __le32 r2t_offset;
+ __le32 r2t_length;
+ __u8 rsvd[4];
+};
+
+/**
+ * struct nvme_tcp_data_pdu - nvme tcp data pdu
+ *
+ * @hdr: pdu common header
+ * @command_id: nvme command identifier which this relates to
+ * @ttag: transfer tag (controller generated)
+ * @data_offset: offset from the start of the command data
+ * @data_length: length of the data stream
+ */
+struct nvme_tcp_data_pdu {
+ struct nvme_tcp_hdr hdr;
+ __u16 command_id;
+ __u16 ttag;
+ __le32 data_offset;
+ __le32 data_length;
+ __u8 rsvd[4];
+};
+
+union nvme_tcp_pdu {
+ struct nvme_tcp_icreq_pdu icreq;
+ struct nvme_tcp_icresp_pdu icresp;
+ struct nvme_tcp_cmd_pdu cmd;
+ struct nvme_tcp_rsp_pdu rsp;
+ struct nvme_tcp_r2t_pdu r2t;
+ struct nvme_tcp_data_pdu data;
+};
+
+#endif /* _LINUX_NVME_TCP_H */
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 818dbe9331be..bbcc83886899 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -52,15 +52,20 @@ enum {
enum {
NVMF_TRTYPE_RDMA = 1, /* RDMA */
NVMF_TRTYPE_FC = 2, /* Fibre Channel */
+ NVMF_TRTYPE_TCP = 3, /* TCP/IP */
NVMF_TRTYPE_LOOP = 254, /* Reserved for host usage */
NVMF_TRTYPE_MAX,
};
/* Transport Requirements codes for Discovery Log Page entry TREQ field */
enum {
- NVMF_TREQ_NOT_SPECIFIED = 0, /* Not specified */
- NVMF_TREQ_REQUIRED = 1, /* Required */
- NVMF_TREQ_NOT_REQUIRED = 2, /* Not Required */
+ NVMF_TREQ_NOT_SPECIFIED = 0, /* Not specified */
+ NVMF_TREQ_REQUIRED = 1, /* Required */
+ NVMF_TREQ_NOT_REQUIRED = 2, /* Not Required */
+#define NVME_TREQ_SECURE_CHANNEL_MASK \
+ (NVMF_TREQ_REQUIRED | NVMF_TREQ_NOT_REQUIRED)
+
+ NVMF_TREQ_DISABLE_SQFLOW = (1 << 2), /* Supports SQ flow control disable */
};
/* RDMA QP Service Type codes for Discovery Log Page entry TSAS
@@ -198,6 +203,11 @@ enum {
NVME_PS_FLAGS_NON_OP_STATE = 1 << 1,
};
+enum nvme_ctrl_attr {
+ NVME_CTRL_ATTR_HID_128_BIT = (1 << 0),
+ NVME_CTRL_ATTR_TBKAS = (1 << 6),
+};
+
struct nvme_id_ctrl {
__le16 vid;
__le16 ssvid;
@@ -214,7 +224,11 @@ struct nvme_id_ctrl {
__le32 rtd3e;
__le32 oaes;
__le32 ctratt;
- __u8 rsvd100[156];
+ __u8 rsvd100[28];
+ __le16 crdt1;
+ __le16 crdt2;
+ __le16 crdt3;
+ __u8 rsvd134[122];
__le16 oacs;
__u8 acl;
__u8 aerl;
@@ -481,12 +495,21 @@ enum {
NVME_AER_NOTICE_NS_CHANGED = 0x00,
NVME_AER_NOTICE_FW_ACT_STARTING = 0x01,
NVME_AER_NOTICE_ANA = 0x03,
+ NVME_AER_NOTICE_DISC_CHANGED = 0xf0,
};
enum {
- NVME_AEN_CFG_NS_ATTR = 1 << 8,
- NVME_AEN_CFG_FW_ACT = 1 << 9,
- NVME_AEN_CFG_ANA_CHANGE = 1 << 11,
+ NVME_AEN_BIT_NS_ATTR = 8,
+ NVME_AEN_BIT_FW_ACT = 9,
+ NVME_AEN_BIT_ANA_CHANGE = 11,
+ NVME_AEN_BIT_DISC_CHANGE = 31,
+};
+
+enum {
+ NVME_AEN_CFG_NS_ATTR = 1 << NVME_AEN_BIT_NS_ATTR,
+ NVME_AEN_CFG_FW_ACT = 1 << NVME_AEN_BIT_FW_ACT,
+ NVME_AEN_CFG_ANA_CHANGE = 1 << NVME_AEN_BIT_ANA_CHANGE,
+ NVME_AEN_CFG_DISC_CHANGE = 1 << NVME_AEN_BIT_DISC_CHANGE,
};
struct nvme_lba_range_type {
@@ -639,7 +662,12 @@ struct nvme_common_command {
__le32 cdw2[2];
__le64 metadata;
union nvme_data_ptr dptr;
- __le32 cdw10[6];
+ __le32 cdw10;
+ __le32 cdw11;
+ __le32 cdw12;
+ __le32 cdw13;
+ __le32 cdw14;
+ __le32 cdw15;
};
struct nvme_rw_command {
@@ -738,6 +766,15 @@ enum {
NVME_HOST_MEM_RETURN = (1 << 1),
};
+struct nvme_feat_host_behavior {
+ __u8 acre;
+ __u8 resv1[511];
+};
+
+enum {
+ NVME_ENABLE_ACRE = 1,
+};
+
/* Admin commands */
enum nvme_admin_opcode {
@@ -792,6 +829,7 @@ enum {
NVME_FEAT_RRL = 0x12,
NVME_FEAT_PLM_CONFIG = 0x13,
NVME_FEAT_PLM_WINDOW = 0x14,
+ NVME_FEAT_HOST_BEHAVIOR = 0x16,
NVME_FEAT_SW_PROGRESS = 0x80,
NVME_FEAT_HOST_ID = 0x81,
NVME_FEAT_RESV_MASK = 0x82,
@@ -1030,6 +1068,10 @@ struct nvmf_disc_rsp_page_hdr {
struct nvmf_disc_rsp_page_entry entries[0];
};
+enum {
+ NVME_CONNECT_DISABLE_SQFLOW = (1 << 2),
+};
+
struct nvmf_connect_command {
__u8 opcode;
__u8 resv1;
@@ -1126,6 +1168,20 @@ struct nvme_command {
};
};
+struct nvme_error_slot {
+ __le64 error_count;
+ __le16 sqid;
+ __le16 cmdid;
+ __le16 status_field;
+ __le16 param_error_location;
+ __le64 lba;
+ __le32 nsid;
+ __u8 vs;
+ __u8 resv[3];
+ __le64 cs;
+ __u8 resv2[24];
+};
+
static inline bool nvme_is_write(struct nvme_command *cmd)
{
/*
@@ -1243,6 +1299,7 @@ enum {
NVME_SC_ANA_TRANSITION = 0x303,
NVME_SC_HOST_PATH_ERROR = 0x370,
+ NVME_SC_CRD = 0x1800,
NVME_SC_DNR = 0x4000,
};
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index 804a50983ec5..14d558146aea 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -30,14 +30,24 @@ struct seq_file;
*/
struct sbitmap_word {
/**
- * @word: The bitmap word itself.
+ * @depth: Number of bits being used in @word/@cleared
*/
- unsigned long word;
+ unsigned long depth;
/**
- * @depth: Number of bits being used in @word.
+ * @word: word holding free bits
*/
- unsigned long depth;
+ unsigned long word ____cacheline_aligned_in_smp;
+
+ /**
+ * @cleared: word holding cleared bits
+ */
+ unsigned long cleared ____cacheline_aligned_in_smp;
+
+ /**
+ * @swap_lock: Held while swapping word <-> cleared
+ */
+ spinlock_t swap_lock;
} ____cacheline_aligned_in_smp;
/**
@@ -125,6 +135,11 @@ struct sbitmap_queue {
*/
struct sbq_wait_state *ws;
+ /*
+ * @ws_active: count of currently active ws waitqueues
+ */
+ atomic_t ws_active;
+
/**
* @round_robin: Allocate bits in strict round-robin order.
*/
@@ -250,12 +265,14 @@ static inline void __sbitmap_for_each_set(struct sbitmap *sb,
nr = SB_NR_TO_BIT(sb, start);
while (scanned < sb->depth) {
- struct sbitmap_word *word = &sb->map[index];
- unsigned int depth = min_t(unsigned int, word->depth - nr,
+ unsigned long word;
+ unsigned int depth = min_t(unsigned int,
+ sb->map[index].depth - nr,
sb->depth - scanned);
scanned += depth;
- if (!word->word)
+ word = sb->map[index].word & ~sb->map[index].cleared;
+ if (!word)
goto next;
/*
@@ -265,7 +282,7 @@ static inline void __sbitmap_for_each_set(struct sbitmap *sb,
*/
depth += nr;
while (1) {
- nr = find_next_bit(&word->word, depth, nr);
+ nr = find_next_bit(&word, depth, nr);
if (nr >= depth)
break;
if (!fn(sb, (index << sb->shift) + nr, data))
@@ -310,6 +327,19 @@ static inline void sbitmap_clear_bit(struct sbitmap *sb, unsigned int bitnr)
clear_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr));
}
+/*
+ * This one is special, since it doesn't actually clear the bit, rather it
+ * sets the corresponding bit in the ->cleared mask instead. Paired with
+ * the caller doing sbitmap_batch_clear() if a given index is full, which
+ * will clear the previously freed entries in the corresponding ->word.
+ */
+static inline void sbitmap_deferred_clear_bit(struct sbitmap *sb, unsigned int bitnr)
+{
+ unsigned long *addr = &sb->map[SB_NR_TO_INDEX(sb, bitnr)].cleared;
+
+ set_bit(SB_NR_TO_BIT(sb, bitnr), addr);
+}
+
static inline void sbitmap_clear_bit_unlock(struct sbitmap *sb,
unsigned int bitnr)
{
@@ -321,8 +351,6 @@ static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr)
return test_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr));
}
-unsigned int sbitmap_weight(const struct sbitmap *sb);
-
/**
* sbitmap_show() - Dump &struct sbitmap information to a &struct seq_file.
* @sb: Bitmap to show.
@@ -531,4 +559,45 @@ void sbitmap_queue_wake_up(struct sbitmap_queue *sbq);
*/
void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m);
+struct sbq_wait {
+ struct sbitmap_queue *sbq; /* if set, sbq_wait is accounted */
+ struct wait_queue_entry wait;
+};
+
+#define DEFINE_SBQ_WAIT(name) \
+ struct sbq_wait name = { \
+ .sbq = NULL, \
+ .wait = { \
+ .private = current, \
+ .func = autoremove_wake_function, \
+ .entry = LIST_HEAD_INIT((name).wait.entry), \
+ } \
+ }
+
+/*
+ * Wrapper around prepare_to_wait_exclusive(), which maintains some extra
+ * internal state.
+ */
+void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq,
+ struct sbq_wait_state *ws,
+ struct sbq_wait *sbq_wait, int state);
+
+/*
+ * Must be paired with sbitmap_prepare_to_wait().
+ */
+void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws,
+ struct sbq_wait *sbq_wait);
+
+/*
+ * Wrapper around add_wait_queue(), which maintains some extra internal state
+ */
+void sbitmap_add_wait_queue(struct sbitmap_queue *sbq,
+ struct sbq_wait_state *ws,
+ struct sbq_wait *sbq_wait);
+
+/*
+ * Must be paired with sbitmap_add_wait_queue()
+ */
+void sbitmap_del_wait_queue(struct sbq_wait *sbq_wait);
+
#endif /* __LINUX_SCALE_BITMAP_H */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2a57a365c711..93f56fddd92a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3339,6 +3339,9 @@ static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset,
}
int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen,
struct msghdr *msg);
+int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset,
+ struct iov_iter *to, int len,
+ struct ahash_request *hash);
int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
struct iov_iter *from, int len);
int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm);
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 55ce99ddb912..ecf584f6b82d 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -11,6 +11,7 @@
#include <linux/kernel.h>
#include <linux/thread_info.h>
+#include <crypto/hash.h>
#include <uapi/linux/uio.h>
struct page;
@@ -266,9 +267,11 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
{
i->count = count;
}
-size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
+size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, struct iov_iter *i);
size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
+size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
+ struct iov_iter *i);
int import_iovec(int type, const struct iovec __user * uvector,
unsigned nr_segs, unsigned fast_segs,
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index fdfd04e348f6..738a0c24874f 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -246,7 +246,8 @@ static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
*
* @bio is a part of the writeback in progress controlled by @wbc. Perform
* writeback specific initialization. This is used to apply the cgroup
- * writeback context.
+ * writeback context. Must be called after the bio has been associated with
+ * a device.
*/
static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
{
@@ -257,7 +258,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
* regular writeback instead of writing things out itself.
*/
if (wbc->wb)
- bio_associate_blkcg(bio, wbc->wb->blkcg_css);
+ bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css);
}
#else /* CONFIG_CGROUP_WRITEBACK */
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index c891ada3c5c2..d85e6befa26b 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -61,6 +61,9 @@ struct scsi_pointer {
/* flags preserved across unprep / reprep */
#define SCMD_PRESERVED_FLAGS (SCMD_UNCHECKED_ISA_DMA | SCMD_INITIALIZED)
+/* for scmd->state */
+#define SCMD_STATE_COMPLETE 0
+
struct scsi_cmnd {
struct scsi_request req;
struct scsi_device *device;
@@ -145,6 +148,7 @@ struct scsi_cmnd {
int result; /* Status code from lower level driver */
int flags; /* Command flags */
+ unsigned long state; /* Command completion state */
unsigned char tag; /* SCSI-II queued command tag */
};
@@ -171,7 +175,7 @@ extern void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count,
size_t *offset, size_t *len);
extern void scsi_kunmap_atomic_sg(void *virt);
-extern int scsi_init_io(struct scsi_cmnd *cmd);
+extern blk_status_t scsi_init_io(struct scsi_cmnd *cmd);
#ifdef CONFIG_SCSI_DMA
extern int scsi_dma_map(struct scsi_cmnd *cmd);
diff --git a/include/scsi/scsi_dh.h b/include/scsi/scsi_dh.h
index c7bba2b24849..a862dc23c68d 100644
--- a/include/scsi/scsi_dh.h
+++ b/include/scsi/scsi_dh.h
@@ -69,7 +69,7 @@ struct scsi_device_handler {
int (*attach)(struct scsi_device *);
void (*detach)(struct scsi_device *);
int (*activate)(struct scsi_device *, activate_complete, void *);
- int (*prep_fn)(struct scsi_device *, struct request *);
+ blk_status_t (*prep_fn)(struct scsi_device *, struct request *);
int (*set_params)(struct scsi_device *, const char *);
void (*rescan)(struct scsi_device *);
};
diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h
index fae8b465233e..6dffa8555a39 100644
--- a/include/scsi/scsi_driver.h
+++ b/include/scsi/scsi_driver.h
@@ -2,6 +2,7 @@
#ifndef _SCSI_SCSI_DRIVER_H
#define _SCSI_SCSI_DRIVER_H
+#include <linux/blk_types.h>
#include <linux/device.h>
struct module;
@@ -13,7 +14,7 @@ struct scsi_driver {
struct device_driver gendrv;
void (*rescan)(struct device *);
- int (*init_command)(struct scsi_cmnd *);
+ blk_status_t (*init_command)(struct scsi_cmnd *);
void (*uninit_command)(struct scsi_cmnd *);
int (*done)(struct scsi_cmnd *);
int (*eh_action)(struct scsi_cmnd *, int);
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 5ea06d310a25..aa760df8c6b3 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -11,7 +11,6 @@
#include <linux/blk-mq.h>
#include <scsi/scsi.h>
-struct request_queue;
struct block_device;
struct completion;
struct module;
@@ -22,7 +21,6 @@ struct scsi_target;
struct Scsi_Host;
struct scsi_host_cmd_pool;
struct scsi_transport_template;
-struct blk_queue_tags;
/*
@@ -547,14 +545,8 @@ struct Scsi_Host {
struct scsi_host_template *hostt;
struct scsi_transport_template *transportt;
- /*
- * Area to keep a shared tag map (if needed, will be
- * NULL if not).
- */
- union {
- struct blk_queue_tag *bqt;
- struct blk_mq_tag_set tag_set;
- };
+ /* Area to keep a shared tag map */
+ struct blk_mq_tag_set tag_set;
atomic_t host_busy; /* commands actually active on low-level */
atomic_t host_blocked;
@@ -648,7 +640,6 @@ struct Scsi_Host {
/* The controller does not support WRITE SAME */
unsigned no_write_same:1;
- unsigned use_blk_mq:1;
unsigned use_cmd_list:1;
/* Host responded with short (<36 bytes) INQUIRY result */
@@ -742,11 +733,6 @@ static inline int scsi_host_in_recovery(struct Scsi_Host *shost)
shost->tmf_in_progress;
}
-static inline bool shost_use_blk_mq(struct Scsi_Host *shost)
-{
- return shost->use_blk_mq;
-}
-
extern int scsi_queue_work(struct Scsi_Host *, struct work_struct *);
extern void scsi_flush_work(struct Scsi_Host *);
diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h
index e192a0caa850..6053d46e794e 100644
--- a/include/scsi/scsi_tcq.h
+++ b/include/scsi/scsi_tcq.h
@@ -23,19 +23,15 @@ static inline struct scsi_cmnd *scsi_host_find_tag(struct Scsi_Host *shost,
int tag)
{
struct request *req = NULL;
+ u16 hwq;
if (tag == SCSI_NO_TAG)
return NULL;
- if (shost_use_blk_mq(shost)) {
- u16 hwq = blk_mq_unique_tag_to_hwq(tag);
-
- if (hwq < shost->tag_set.nr_hw_queues) {
- req = blk_mq_tag_to_rq(shost->tag_set.tags[hwq],
- blk_mq_unique_tag_to_tag(tag));
- }
- } else {
- req = blk_map_queue_find_tag(shost->bqt, tag);
+ hwq = blk_mq_unique_tag_to_hwq(tag);
+ if (hwq < shost->tag_set.nr_hw_queues) {
+ req = blk_mq_tag_to_rq(shost->tag_set.tags[hwq],
+ blk_mq_unique_tag_to_tag(tag));
}
if (!req)
diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h
index 2cbd6e42ad83..e4526f85c19d 100644
--- a/include/trace/events/bcache.h
+++ b/include/trace/events/bcache.h
@@ -221,9 +221,30 @@ DEFINE_EVENT(cache_set, bcache_journal_entry_full,
TP_ARGS(c)
);
-DEFINE_EVENT(bcache_bio, bcache_journal_write,
- TP_PROTO(struct bio *bio),
- TP_ARGS(bio)
+TRACE_EVENT(bcache_journal_write,
+ TP_PROTO(struct bio *bio, u32 keys),
+ TP_ARGS(bio, keys),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __field(sector_t, sector )
+ __field(unsigned int, nr_sector )
+ __array(char, rwbs, 6 )
+ __field(u32, nr_keys )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = bio_dev(bio);
+ __entry->sector = bio->bi_iter.bi_sector;
+ __entry->nr_sector = bio->bi_iter.bi_size >> 9;
+ __entry->nr_keys = keys;
+ blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+ ),
+
+ TP_printk("%d,%d %s %llu + %u keys %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+ (unsigned long long)__entry->sector, __entry->nr_sector,
+ __entry->nr_keys)
);
/* Btree */
diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h
index ce43d340f010..8387e0af0f76 100644
--- a/include/uapi/linux/aio_abi.h
+++ b/include/uapi/linux/aio_abi.h
@@ -50,6 +50,8 @@ enum {
*
* IOCB_FLAG_RESFD - Set if the "aio_resfd" member of the "struct iocb"
* is valid.
+ * IOCB_FLAG_IOPRIO - Set if the "aio_reqprio" member of the "struct iocb"
+ * is valid.
*/
#define IOCB_FLAG_RESFD (1 << 0)
#define IOCB_FLAG_IOPRIO (1 << 1)