aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-07-26 15:03:07 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-26 15:03:07 -0700
commitd05d7f40791ccbb6e543cc5dd6a6aa08fc71d635 (patch)
treedc0039fe490a41a70de10d58fe8e6136db46463a /include/linux
parentMerge branch 'for-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata (diff)
parentblock: Fix front merge check (diff)
downloadlinux-dev-d05d7f40791ccbb6e543cc5dd6a6aa08fc71d635.tar.xz
linux-dev-d05d7f40791ccbb6e543cc5dd6a6aa08fc71d635.zip
Merge branch 'for-4.8/core' of git://git.kernel.dk/linux-block
Pull core block updates from Jens Axboe: - the big change is the cleanup from Mike Christie, cleaning up our uses of command types and modified flags. This is what will throw some merge conflicts - regression fix for the above for btrfs, from Vincent - following up to the above, better packing of struct request from Christoph - a 2038 fix for blktrace from Arnd - a few trivial/spelling fixes from Bart Van Assche - a front merge check fix from Damien, which could cause issues on SMR drives - Atari partition fix from Gabriel - convert cfq to highres timers, since jiffies isn't granular enough for some devices these days. From Jan and Jeff - CFQ priority boost fix idle classes, from me - cleanup series from Ming, improving our bio/bvec iteration - a direct issue fix for blk-mq from Omar - fix for plug merging not involving the IO scheduler, like we do for other types of merges. From Tahsin - expose DAX type internally and through sysfs. From Toshi and Yigal * 'for-4.8/core' of git://git.kernel.dk/linux-block: (76 commits) block: Fix front merge check block: do not merge requests without consulting with io scheduler block: Fix spelling in a source code comment block: expose QUEUE_FLAG_DAX in sysfs block: add QUEUE_FLAG_DAX for devices to advertise their DAX support Btrfs: fix comparison in __btrfs_map_block() block: atari: Return early for unsupported sector size Doc: block: Fix a typo in queue-sysfs.txt cfq-iosched: Charge at least 1 jiffie instead of 1 ns cfq-iosched: Fix regression in bonnie++ rewrite performance cfq-iosched: Convert slice_resid from u64 to s64 block: Convert fifo_time from ulong to u64 blktrace: avoid using timespec block/blk-cgroup.c: Declare local symbols static block/bio-integrity.c: Add #include "blk.h" block/partition-generic.c: Remove a set-but-not-used variable block: bio: kill BIO_MAX_SIZE cfq-iosched: temporarily boost queue priority for idle classes block: drbd: avoid to use BIO_MAX_SIZE block: bio: remove BIO_MAX_SECTORS ...
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/bio.h83
-rw-r--r--include/linux/blk-cgroup.h13
-rw-r--r--include/linux/blk_types.h68
-rw-r--r--include/linux/blkdev.h62
-rw-r--r--include/linux/blktrace_api.h2
-rw-r--r--include/linux/buffer_head.h11
-rw-r--r--include/linux/bvec.h96
-rw-r--r--include/linux/dm-io.h3
-rw-r--r--include/linux/elevator.h15
-rw-r--r--include/linux/fs.h43
10 files changed, 235 insertions, 161 deletions
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 9faebf7f9a33..b7e1a00810f2 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -41,44 +41,9 @@
#endif
#define BIO_MAX_PAGES 256
-#define BIO_MAX_SIZE (BIO_MAX_PAGES << PAGE_SHIFT)
-#define BIO_MAX_SECTORS (BIO_MAX_SIZE >> 9)
-/*
- * upper 16 bits of bi_rw define the io priority of this bio
- */
-#define BIO_PRIO_SHIFT (8 * sizeof(unsigned long) - IOPRIO_BITS)
-#define bio_prio(bio) ((bio)->bi_rw >> BIO_PRIO_SHIFT)
-#define bio_prio_valid(bio) ioprio_valid(bio_prio(bio))
-
-#define bio_set_prio(bio, prio) do { \
- WARN_ON(prio >= (1 << IOPRIO_BITS)); \
- (bio)->bi_rw &= ((1UL << BIO_PRIO_SHIFT) - 1); \
- (bio)->bi_rw |= ((unsigned long) (prio) << BIO_PRIO_SHIFT); \
-} while (0)
-
-/*
- * various member access, note that bio_data should of course not be used
- * on highmem page vectors
- */
-#define __bvec_iter_bvec(bvec, iter) (&(bvec)[(iter).bi_idx])
-
-#define bvec_iter_page(bvec, iter) \
- (__bvec_iter_bvec((bvec), (iter))->bv_page)
-
-#define bvec_iter_len(bvec, iter) \
- min((iter).bi_size, \
- __bvec_iter_bvec((bvec), (iter))->bv_len - (iter).bi_bvec_done)
-
-#define bvec_iter_offset(bvec, iter) \
- (__bvec_iter_bvec((bvec), (iter))->bv_offset + (iter).bi_bvec_done)
-
-#define bvec_iter_bvec(bvec, iter) \
-((struct bio_vec) { \
- .bv_page = bvec_iter_page((bvec), (iter)), \
- .bv_len = bvec_iter_len((bvec), (iter)), \
- .bv_offset = bvec_iter_offset((bvec), (iter)), \
-})
+#define bio_prio(bio) (bio)->bi_ioprio
+#define bio_set_prio(bio, prio) ((bio)->bi_ioprio = prio)
#define bio_iter_iovec(bio, iter) \
bvec_iter_bvec((bio)->bi_io_vec, (iter))
@@ -106,18 +71,23 @@ static inline bool bio_has_data(struct bio *bio)
{
if (bio &&
bio->bi_iter.bi_size &&
- !(bio->bi_rw & REQ_DISCARD))
+ bio_op(bio) != REQ_OP_DISCARD)
return true;
return false;
}
+static inline bool bio_no_advance_iter(struct bio *bio)
+{
+ return bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_WRITE_SAME;
+}
+
static inline bool bio_is_rw(struct bio *bio)
{
if (!bio_has_data(bio))
return false;
- if (bio->bi_rw & BIO_NO_ADVANCE_ITER_MASK)
+ if (bio_no_advance_iter(bio))
return false;
return true;
@@ -193,39 +163,12 @@ static inline void *bio_data(struct bio *bio)
#define bio_for_each_segment_all(bvl, bio, i) \
for (i = 0, bvl = (bio)->bi_io_vec; i < (bio)->bi_vcnt; i++, bvl++)
-static inline void bvec_iter_advance(struct bio_vec *bv, struct bvec_iter *iter,
- unsigned bytes)
-{
- WARN_ONCE(bytes > iter->bi_size,
- "Attempted to advance past end of bvec iter\n");
-
- while (bytes) {
- unsigned len = min(bytes, bvec_iter_len(bv, *iter));
-
- bytes -= len;
- iter->bi_size -= len;
- iter->bi_bvec_done += len;
-
- if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) {
- iter->bi_bvec_done = 0;
- iter->bi_idx++;
- }
- }
-}
-
-#define for_each_bvec(bvl, bio_vec, iter, start) \
- for (iter = (start); \
- (iter).bi_size && \
- ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \
- bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len))
-
-
static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
unsigned bytes)
{
iter->bi_sector += bytes >> 9;
- if (bio->bi_rw & BIO_NO_ADVANCE_ITER_MASK)
+ if (bio_no_advance_iter(bio))
iter->bi_size -= bytes;
else
bvec_iter_advance(bio->bi_io_vec, iter, bytes);
@@ -253,10 +196,10 @@ static inline unsigned bio_segments(struct bio *bio)
* differently:
*/
- if (bio->bi_rw & REQ_DISCARD)
+ if (bio_op(bio) == REQ_OP_DISCARD)
return 1;
- if (bio->bi_rw & REQ_WRITE_SAME)
+ if (bio_op(bio) == REQ_OP_WRITE_SAME)
return 1;
bio_for_each_segment(bv, bio, iter)
@@ -473,7 +416,7 @@ static inline void bio_io_error(struct bio *bio)
struct request_queue;
extern int bio_phys_segments(struct request_queue *, struct bio *);
-extern int submit_bio_wait(int rw, struct bio *bio);
+extern int submit_bio_wait(struct bio *bio);
extern void bio_advance(struct bio *, unsigned);
extern void bio_init(struct bio *);
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index c02e669945e9..f77150a4a96a 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -590,25 +590,26 @@ static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat)
/**
* blkg_rwstat_add - add a value to a blkg_rwstat
* @rwstat: target blkg_rwstat
- * @rw: mask of REQ_{WRITE|SYNC}
+ * @op: REQ_OP
+ * @op_flags: rq_flag_bits
* @val: value to add
*
* Add @val to @rwstat. The counters are chosen according to @rw. The
* caller is responsible for synchronizing calls to this function.
*/
static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
- int rw, uint64_t val)
+ int op, int op_flags, uint64_t val)
{
struct percpu_counter *cnt;
- if (rw & REQ_WRITE)
+ if (op_is_write(op))
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE];
else
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];
__percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH);
- if (rw & REQ_SYNC)
+ if (op_flags & REQ_SYNC)
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
else
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
@@ -713,9 +714,9 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
if (!throtl) {
blkg = blkg ?: q->root_blkg;
- blkg_rwstat_add(&blkg->stat_bytes, bio->bi_rw,
+ blkg_rwstat_add(&blkg->stat_bytes, bio_op(bio), bio->bi_rw,
bio->bi_iter.bi_size);
- blkg_rwstat_add(&blkg->stat_ios, bio->bi_rw, 1);
+ blkg_rwstat_add(&blkg->stat_ios, bio_op(bio), bio->bi_rw, 1);
}
rcu_read_unlock();
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 77e5d81f07aa..b588e968dc01 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -6,6 +6,7 @@
#define __LINUX_BLK_TYPES_H
#include <linux/types.h>
+#include <linux/bvec.h>
struct bio_set;
struct bio;
@@ -17,28 +18,7 @@ struct cgroup_subsys_state;
typedef void (bio_end_io_t) (struct bio *);
typedef void (bio_destructor_t) (struct bio *);
-/*
- * was unsigned short, but we might as well be ready for > 64kB I/O pages
- */
-struct bio_vec {
- struct page *bv_page;
- unsigned int bv_len;
- unsigned int bv_offset;
-};
-
#ifdef CONFIG_BLOCK
-
-struct bvec_iter {
- sector_t bi_sector; /* device address in 512 byte
- sectors */
- unsigned int bi_size; /* residual I/O count */
-
- unsigned int bi_idx; /* current index into bvl_vec */
-
- unsigned int bi_bvec_done; /* number of bytes completed in
- current bvec */
-};
-
/*
* main unit of I/O for the block layer and lower layers (ie drivers and
* stacking drivers)
@@ -48,9 +28,10 @@ struct bio {
struct block_device *bi_bdev;
unsigned int bi_flags; /* status, command, etc */
int bi_error;
- unsigned long bi_rw; /* bottom bits READ/WRITE,
- * top bits priority
+ unsigned int bi_rw; /* bottom bits req flags,
+ * top bits REQ_OP
*/
+ unsigned short bi_ioprio;
struct bvec_iter bi_iter;
@@ -107,6 +88,16 @@ struct bio {
struct bio_vec bi_inline_vecs[0];
};
+#define BIO_OP_SHIFT (8 * sizeof(unsigned int) - REQ_OP_BITS)
+#define bio_op(bio) ((bio)->bi_rw >> BIO_OP_SHIFT)
+
+#define bio_set_op_attrs(bio, op, op_flags) do { \
+ WARN_ON(op >= (1 << REQ_OP_BITS)); \
+ (bio)->bi_rw &= ((1 << BIO_OP_SHIFT) - 1); \
+ (bio)->bi_rw |= ((unsigned int) (op) << BIO_OP_SHIFT); \
+ (bio)->bi_rw |= op_flags; \
+} while (0)
+
#define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs)
/*
@@ -145,7 +136,6 @@ struct bio {
*/
enum rq_flag_bits {
/* common flags */
- __REQ_WRITE, /* not set, read. set, write */
__REQ_FAILFAST_DEV, /* no driver retries of device errors */
__REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */
__REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */
@@ -153,14 +143,12 @@ enum rq_flag_bits {
__REQ_SYNC, /* request is sync (sync write or read) */
__REQ_META, /* metadata io request */
__REQ_PRIO, /* boost priority in cfq */
- __REQ_DISCARD, /* request to discard sectors */
- __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */
- __REQ_WRITE_SAME, /* write same block many times */
+ __REQ_SECURE, /* secure discard (used with REQ_OP_DISCARD) */
__REQ_NOIDLE, /* don't anticipate more IO after this one */
__REQ_INTEGRITY, /* I/O includes block integrity payload */
__REQ_FUA, /* forced unit access */
- __REQ_FLUSH, /* request for cache flush */
+ __REQ_PREFLUSH, /* request for cache flush */
/* bio only flags */
__REQ_RAHEAD, /* read ahead, can fail anytime */
@@ -191,31 +179,25 @@ enum rq_flag_bits {
__REQ_NR_BITS, /* stops here */
};
-#define REQ_WRITE (1ULL << __REQ_WRITE)
#define REQ_FAILFAST_DEV (1ULL << __REQ_FAILFAST_DEV)
#define REQ_FAILFAST_TRANSPORT (1ULL << __REQ_FAILFAST_TRANSPORT)
#define REQ_FAILFAST_DRIVER (1ULL << __REQ_FAILFAST_DRIVER)
#define REQ_SYNC (1ULL << __REQ_SYNC)
#define REQ_META (1ULL << __REQ_META)
#define REQ_PRIO (1ULL << __REQ_PRIO)
-#define REQ_DISCARD (1ULL << __REQ_DISCARD)
-#define REQ_WRITE_SAME (1ULL << __REQ_WRITE_SAME)
#define REQ_NOIDLE (1ULL << __REQ_NOIDLE)
#define REQ_INTEGRITY (1ULL << __REQ_INTEGRITY)
#define REQ_FAILFAST_MASK \
(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
#define REQ_COMMON_MASK \
- (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \
- REQ_DISCARD | REQ_WRITE_SAME | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | \
- REQ_SECURE | REQ_INTEGRITY | REQ_NOMERGE)
+ (REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | REQ_NOIDLE | \
+ REQ_PREFLUSH | REQ_FUA | REQ_SECURE | REQ_INTEGRITY | REQ_NOMERGE)
#define REQ_CLONE_MASK REQ_COMMON_MASK
-#define BIO_NO_ADVANCE_ITER_MASK (REQ_DISCARD|REQ_WRITE_SAME)
-
/* This mask is used for both bio and request merge checking */
#define REQ_NOMERGE_FLAGS \
- (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA | REQ_FLUSH_SEQ)
+ (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_PREFLUSH | REQ_FUA | REQ_FLUSH_SEQ)
#define REQ_RAHEAD (1ULL << __REQ_RAHEAD)
#define REQ_THROTTLED (1ULL << __REQ_THROTTLED)
@@ -233,7 +215,7 @@ enum rq_flag_bits {
#define REQ_PREEMPT (1ULL << __REQ_PREEMPT)
#define REQ_ALLOCED (1ULL << __REQ_ALLOCED)
#define REQ_COPY_USER (1ULL << __REQ_COPY_USER)
-#define REQ_FLUSH (1ULL << __REQ_FLUSH)
+#define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH)
#define REQ_FLUSH_SEQ (1ULL << __REQ_FLUSH_SEQ)
#define REQ_IO_STAT (1ULL << __REQ_IO_STAT)
#define REQ_MIXED_MERGE (1ULL << __REQ_MIXED_MERGE)
@@ -242,6 +224,16 @@ enum rq_flag_bits {
#define REQ_HASHED (1ULL << __REQ_HASHED)
#define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT)
+enum req_op {
+ REQ_OP_READ,
+ REQ_OP_WRITE,
+ REQ_OP_DISCARD, /* request to discard sectors */
+ REQ_OP_WRITE_SAME, /* write same block many times */
+ REQ_OP_FLUSH, /* request for cache flush */
+};
+
+#define REQ_OP_BITS 3
+
typedef unsigned int blk_qc_t;
#define BLK_QC_T_NONE -1U
#define BLK_QC_T_SHIFT 16
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3d9cf326574f..48f05d768a53 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -90,18 +90,17 @@ struct request {
struct list_head queuelist;
union {
struct call_single_data csd;
- unsigned long fifo_time;
+ u64 fifo_time;
};
struct request_queue *q;
struct blk_mq_ctx *mq_ctx;
- u64 cmd_flags;
+ int cpu;
unsigned cmd_type;
+ u64 cmd_flags;
unsigned long atomic_flags;
- int cpu;
-
/* the following two fields are internal, NEVER access directly */
unsigned int __data_len; /* total data len */
sector_t __sector; /* sector cursor */
@@ -200,6 +199,20 @@ struct request {
struct request *next_rq;
};
+#define REQ_OP_SHIFT (8 * sizeof(u64) - REQ_OP_BITS)
+#define req_op(req) ((req)->cmd_flags >> REQ_OP_SHIFT)
+
+#define req_set_op(req, op) do { \
+ WARN_ON(op >= (1 << REQ_OP_BITS)); \
+ (req)->cmd_flags &= ((1ULL << REQ_OP_SHIFT) - 1); \
+ (req)->cmd_flags |= ((u64) (op) << REQ_OP_SHIFT); \
+} while (0)
+
+#define req_set_op_attrs(req, op, flags) do { \
+ req_set_op(req, op); \
+ (req)->cmd_flags |= flags; \
+} while (0)
+
static inline unsigned short req_get_ioprio(struct request *req)
{
return req->ioprio;
@@ -492,6 +505,7 @@ struct request_queue {
#define QUEUE_FLAG_WC 23 /* Write back caching */
#define QUEUE_FLAG_FUA 24 /* device supports FUA writes */
#define QUEUE_FLAG_FLUSH_NQ 25 /* flush not queueuable */
+#define QUEUE_FLAG_DAX 26 /* device supports DAX */
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_STACKABLE) | \
@@ -581,6 +595,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
#define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
#define blk_queue_secdiscard(q) (blk_queue_discard(q) && \
test_bit(QUEUE_FLAG_SECDISCARD, &(q)->queue_flags))
+#define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
#define blk_noretry_request(rq) \
((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
@@ -597,7 +612,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
#define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)
-#define rq_data_dir(rq) ((int)((rq)->cmd_flags & 1))
+#define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ)
/*
* Driver can handle struct request, if it either has an old style
@@ -616,14 +631,14 @@ static inline unsigned int blk_queue_cluster(struct request_queue *q)
/*
* We regard a request as sync, if either a read or a sync write
*/
-static inline bool rw_is_sync(unsigned int rw_flags)
+static inline bool rw_is_sync(int op, unsigned int rw_flags)
{
- return !(rw_flags & REQ_WRITE) || (rw_flags & REQ_SYNC);
+ return op == REQ_OP_READ || (rw_flags & REQ_SYNC);
}
static inline bool rq_is_sync(struct request *rq)
{
- return rw_is_sync(rq->cmd_flags);
+ return rw_is_sync(req_op(rq), rq->cmd_flags);
}
static inline bool blk_rl_full(struct request_list *rl, bool sync)
@@ -652,22 +667,25 @@ static inline bool rq_mergeable(struct request *rq)
if (rq->cmd_type != REQ_TYPE_FS)
return false;
+ if (req_op(rq) == REQ_OP_FLUSH)
+ return false;
+
if (rq->cmd_flags & REQ_NOMERGE_FLAGS)
return false;
return true;
}
-static inline bool blk_check_merge_flags(unsigned int flags1,
- unsigned int flags2)
+static inline bool blk_check_merge_flags(unsigned int flags1, unsigned int op1,
+ unsigned int flags2, unsigned int op2)
{
- if ((flags1 & REQ_DISCARD) != (flags2 & REQ_DISCARD))
+ if ((op1 == REQ_OP_DISCARD) != (op2 == REQ_OP_DISCARD))
return false;
if ((flags1 & REQ_SECURE) != (flags2 & REQ_SECURE))
return false;
- if ((flags1 & REQ_WRITE_SAME) != (flags2 & REQ_WRITE_SAME))
+ if ((op1 == REQ_OP_WRITE_SAME) != (op2 == REQ_OP_WRITE_SAME))
return false;
return true;
@@ -879,12 +897,12 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
}
static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
- unsigned int cmd_flags)
+ int op)
{
- if (unlikely(cmd_flags & REQ_DISCARD))
+ if (unlikely(op == REQ_OP_DISCARD))
return min(q->limits.max_discard_sectors, UINT_MAX >> 9);
- if (unlikely(cmd_flags & REQ_WRITE_SAME))
+ if (unlikely(op == REQ_OP_WRITE_SAME))
return q->limits.max_write_same_sectors;
return q->limits.max_sectors;
@@ -904,18 +922,19 @@ static inline unsigned int blk_max_size_offset(struct request_queue *q,
(offset & (q->limits.chunk_sectors - 1));
}
-static inline unsigned int blk_rq_get_max_sectors(struct request *rq)
+static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
+ sector_t offset)
{
struct request_queue *q = rq->q;
if (unlikely(rq->cmd_type != REQ_TYPE_FS))
return q->limits.max_hw_sectors;
- if (!q->limits.chunk_sectors || (rq->cmd_flags & REQ_DISCARD))
- return blk_queue_get_max_sectors(q, rq->cmd_flags);
+ if (!q->limits.chunk_sectors || (req_op(rq) == REQ_OP_DISCARD))
+ return blk_queue_get_max_sectors(q, req_op(rq));
- return min(blk_max_size_offset(q, blk_rq_pos(rq)),
- blk_queue_get_max_sectors(q, rq->cmd_flags));
+ return min(blk_max_size_offset(q, offset),
+ blk_queue_get_max_sectors(q, req_op(rq)));
}
static inline unsigned int blk_rq_count_bios(struct request *rq)
@@ -1141,7 +1160,8 @@ extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *);
extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask, int type, struct bio **biop);
+ sector_t nr_sects, gfp_t gfp_mask, int op_flags,
+ struct bio **biop);
extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct page *page);
extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 0f3172b8b225..cceb72f9e29f 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -118,7 +118,7 @@ static inline int blk_cmd_buf_len(struct request *rq)
}
extern void blk_dump_cmd(char *buf, struct request *rq);
-extern void blk_fill_rwbs(char *rwbs, u32 rw, int bytes);
+extern void blk_fill_rwbs(char *rwbs, int op, u32 rw, int bytes);
#endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 7e14e545c4b6..ebbacd14d450 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -187,12 +187,13 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags);
void free_buffer_head(struct buffer_head * bh);
void unlock_buffer(struct buffer_head *bh);
void __lock_buffer(struct buffer_head *bh);
-void ll_rw_block(int, int, struct buffer_head * bh[]);
+void ll_rw_block(int, int, int, struct buffer_head * bh[]);
int sync_dirty_buffer(struct buffer_head *bh);
-int __sync_dirty_buffer(struct buffer_head *bh, int rw);
-void write_dirty_buffer(struct buffer_head *bh, int rw);
-int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags);
-int submit_bh(int, struct buffer_head *);
+int __sync_dirty_buffer(struct buffer_head *bh, int op_flags);
+void write_dirty_buffer(struct buffer_head *bh, int op_flags);
+int _submit_bh(int op, int op_flags, struct buffer_head *bh,
+ unsigned long bio_flags);
+int submit_bh(int, int, struct buffer_head *);
void write_boundary_block(struct block_device *bdev,
sector_t bblock, unsigned blocksize);
int bh_uptodate_or_lock(struct buffer_head *bh);
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
new file mode 100644
index 000000000000..701b64a3b7c5
--- /dev/null
+++ b/include/linux/bvec.h
@@ -0,0 +1,96 @@
+/*
+ * bvec iterator
+ *
+ * Copyright (C) 2001 Ming Lei <ming.lei@canonical.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
+ */
+#ifndef __LINUX_BVEC_ITER_H
+#define __LINUX_BVEC_ITER_H
+
+#include <linux/kernel.h>
+#include <linux/bug.h>
+
+/*
+ * was unsigned short, but we might as well be ready for > 64kB I/O pages
+ */
+struct bio_vec {
+ struct page *bv_page;
+ unsigned int bv_len;
+ unsigned int bv_offset;
+};
+
+struct bvec_iter {
+ sector_t bi_sector; /* device address in 512 byte
+ sectors */
+ unsigned int bi_size; /* residual I/O count */
+
+ unsigned int bi_idx; /* current index into bvl_vec */
+
+ unsigned int bi_bvec_done; /* number of bytes completed in
+ current bvec */
+};
+
+/*
+ * various member access, note that bio_data should of course not be used
+ * on highmem page vectors
+ */
+#define __bvec_iter_bvec(bvec, iter) (&(bvec)[(iter).bi_idx])
+
+#define bvec_iter_page(bvec, iter) \
+ (__bvec_iter_bvec((bvec), (iter))->bv_page)
+
+#define bvec_iter_len(bvec, iter) \
+ min((iter).bi_size, \
+ __bvec_iter_bvec((bvec), (iter))->bv_len - (iter).bi_bvec_done)
+
+#define bvec_iter_offset(bvec, iter) \
+ (__bvec_iter_bvec((bvec), (iter))->bv_offset + (iter).bi_bvec_done)
+
+#define bvec_iter_bvec(bvec, iter) \
+((struct bio_vec) { \
+ .bv_page = bvec_iter_page((bvec), (iter)), \
+ .bv_len = bvec_iter_len((bvec), (iter)), \
+ .bv_offset = bvec_iter_offset((bvec), (iter)), \
+})
+
+static inline void bvec_iter_advance(const struct bio_vec *bv,
+ struct bvec_iter *iter,
+ unsigned bytes)
+{
+ WARN_ONCE(bytes > iter->bi_size,
+ "Attempted to advance past end of bvec iter\n");
+
+ while (bytes) {
+ unsigned len = min(bytes, bvec_iter_len(bv, *iter));
+
+ bytes -= len;
+ iter->bi_size -= len;
+ iter->bi_bvec_done += len;
+
+ if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) {
+ iter->bi_bvec_done = 0;
+ iter->bi_idx++;
+ }
+ }
+}
+
+#define for_each_bvec(bvl, bio_vec, iter, start) \
+ for (iter = (start); \
+ (iter).bi_size && \
+ ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \
+ bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len))
+
+#endif /* __LINUX_BVEC_ITER_H */
diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h
index a68cbe59e6ad..b91b023deffb 100644
--- a/include/linux/dm-io.h
+++ b/include/linux/dm-io.h
@@ -57,7 +57,8 @@ struct dm_io_notify {
*/
struct dm_io_client;
struct dm_io_request {
- int bi_rw; /* READ|WRITE - not READA */
+ int bi_op; /* REQ_OP */
+ int bi_op_flags; /* rq_flag_bits */
struct dm_io_memory mem; /* Memory to use for io */
struct dm_io_notify notify; /* Synchronous if notify.fn is NULL */
struct dm_io_client *client; /* Client memory handler */
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 638b324f0291..e7f358d2e5fc 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -16,7 +16,11 @@ typedef void (elevator_merge_req_fn) (struct request_queue *, struct request *,
typedef void (elevator_merged_fn) (struct request_queue *, struct request *, int);
-typedef int (elevator_allow_merge_fn) (struct request_queue *, struct request *, struct bio *);
+typedef int (elevator_allow_bio_merge_fn) (struct request_queue *,
+ struct request *, struct bio *);
+
+typedef int (elevator_allow_rq_merge_fn) (struct request_queue *,
+ struct request *, struct request *);
typedef void (elevator_bio_merged_fn) (struct request_queue *,
struct request *, struct bio *);
@@ -26,7 +30,7 @@ typedef int (elevator_dispatch_fn) (struct request_queue *, int);
typedef void (elevator_add_req_fn) (struct request_queue *, struct request *);
typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *);
typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *);
-typedef int (elevator_may_queue_fn) (struct request_queue *, int);
+typedef int (elevator_may_queue_fn) (struct request_queue *, int, int);
typedef void (elevator_init_icq_fn) (struct io_cq *);
typedef void (elevator_exit_icq_fn) (struct io_cq *);
@@ -46,7 +50,8 @@ struct elevator_ops
elevator_merge_fn *elevator_merge_fn;
elevator_merged_fn *elevator_merged_fn;
elevator_merge_req_fn *elevator_merge_req_fn;
- elevator_allow_merge_fn *elevator_allow_merge_fn;
+ elevator_allow_bio_merge_fn *elevator_allow_bio_merge_fn;
+ elevator_allow_rq_merge_fn *elevator_allow_rq_merge_fn;
elevator_bio_merged_fn *elevator_bio_merged_fn;
elevator_dispatch_fn *elevator_dispatch_fn;
@@ -134,7 +139,7 @@ extern struct request *elv_former_request(struct request_queue *, struct request
extern struct request *elv_latter_request(struct request_queue *, struct request *);
extern int elv_register_queue(struct request_queue *q);
extern void elv_unregister_queue(struct request_queue *q);
-extern int elv_may_queue(struct request_queue *, int);
+extern int elv_may_queue(struct request_queue *, int, int);
extern void elv_completed_request(struct request_queue *, struct request *);
extern int elv_set_request(struct request_queue *q, struct request *rq,
struct bio *bio, gfp_t gfp_mask);
@@ -157,7 +162,7 @@ extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t);
extern int elevator_init(struct request_queue *, char *);
extern void elevator_exit(struct elevator_queue *);
extern int elevator_change(struct request_queue *, const char *);
-extern bool elv_rq_merge_ok(struct request *, struct bio *);
+extern bool elv_bio_merge_ok(struct request *, struct bio *);
extern struct elevator_queue *elevator_alloc(struct request_queue *,
struct elevator_type *);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dd288148a6b1..183024525d40 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -152,9 +152,10 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
#define CHECK_IOVEC_ONLY -1
/*
- * The below are the various read and write types that we support. Some of
+ * The below are the various read and write flags that we support. Some of
* them include behavioral modifiers that send information down to the
- * block layer and IO scheduler. Terminology:
+ * block layer and IO scheduler. They should be used along with a req_op.
+ * Terminology:
*
* The block layer uses device plugging to defer IO a little bit, in
* the hope that we will see more IO very shortly. This increases
@@ -193,19 +194,19 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
* non-volatile media on completion.
*
*/
-#define RW_MASK REQ_WRITE
+#define RW_MASK REQ_OP_WRITE
#define RWA_MASK REQ_RAHEAD
-#define READ 0
+#define READ REQ_OP_READ
#define WRITE RW_MASK
#define READA RWA_MASK
-#define READ_SYNC (READ | REQ_SYNC)
-#define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE)
-#define WRITE_ODIRECT (WRITE | REQ_SYNC)
-#define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH)
-#define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA)
-#define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA)
+#define READ_SYNC REQ_SYNC
+#define WRITE_SYNC (REQ_SYNC | REQ_NOIDLE)
+#define WRITE_ODIRECT REQ_SYNC
+#define WRITE_FLUSH (REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH)
+#define WRITE_FUA (REQ_SYNC | REQ_NOIDLE | REQ_FUA)
+#define WRITE_FLUSH_FUA (REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH | REQ_FUA)
/*
* Attribute flags. These should be or-ed together to figure out what
@@ -2464,15 +2465,29 @@ extern void make_bad_inode(struct inode *);
extern bool is_bad_inode(struct inode *);
#ifdef CONFIG_BLOCK
+static inline bool op_is_write(unsigned int op)
+{
+ return op == REQ_OP_READ ? false : true;
+}
+
/*
* return READ, READA, or WRITE
*/
-#define bio_rw(bio) ((bio)->bi_rw & (RW_MASK | RWA_MASK))
+static inline int bio_rw(struct bio *bio)
+{
+ if (op_is_write(bio_op(bio)))
+ return WRITE;
+
+ return bio->bi_rw & RWA_MASK;
+}
/*
* return data direction, READ or WRITE
*/
-#define bio_data_dir(bio) ((bio)->bi_rw & 1)
+static inline int bio_data_dir(struct bio *bio)
+{
+ return op_is_write(bio_op(bio)) ? WRITE : READ;
+}
extern void check_disk_size_change(struct gendisk *disk,
struct block_device *bdev);
@@ -2747,7 +2762,7 @@ static inline void remove_inode_hash(struct inode *inode)
extern void inode_sb_list_add(struct inode *inode);
#ifdef CONFIG_BLOCK
-extern blk_qc_t submit_bio(int, struct bio *);
+extern blk_qc_t submit_bio(struct bio *);
extern int bdev_read_only(struct block_device *);
#endif
extern int set_blocksize(struct block_device *, int);
@@ -2802,7 +2817,7 @@ extern int generic_file_open(struct inode * inode, struct file * filp);
extern int nonseekable_open(struct inode * inode, struct file * filp);
#ifdef CONFIG_BLOCK
-typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode,
+typedef void (dio_submit_t)(struct bio *bio, struct inode *inode,
loff_t file_offset);
enum {