From 1faa16d22877f4839bd433547d770c676d1d964c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 6 Apr 2009 14:48:01 +0200 Subject: block: change the request allocation/congestion logic to be sync/async based This makes sure that we never wait on async IO for sync requests, instead of doing the split on writes vs reads. Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- block/blk-sysfs.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'block/blk-sysfs.c') diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index e29ddfc73cf4..3ff9bba3379a 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -48,28 +48,28 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) q->nr_requests = nr; blk_queue_congestion_threshold(q); - if (rl->count[READ] >= queue_congestion_on_threshold(q)) - blk_set_queue_congested(q, READ); - else if (rl->count[READ] < queue_congestion_off_threshold(q)) - blk_clear_queue_congested(q, READ); - - if (rl->count[WRITE] >= queue_congestion_on_threshold(q)) - blk_set_queue_congested(q, WRITE); - else if (rl->count[WRITE] < queue_congestion_off_threshold(q)) - blk_clear_queue_congested(q, WRITE); - - if (rl->count[READ] >= q->nr_requests) { - blk_set_queue_full(q, READ); - } else if (rl->count[READ]+1 <= q->nr_requests) { - blk_clear_queue_full(q, READ); - wake_up(&rl->wait[READ]); + if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q)) + blk_set_queue_congested(q, BLK_RW_SYNC); + else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q)) + blk_clear_queue_congested(q, BLK_RW_SYNC); + + if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q)) + blk_set_queue_congested(q, BLK_RW_ASYNC); + else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q)) + blk_clear_queue_congested(q, BLK_RW_ASYNC); + + if (rl->count[BLK_RW_SYNC] >= q->nr_requests) { + blk_set_queue_full(q, BLK_RW_SYNC); + } else if (rl->count[BLK_RW_SYNC]+1 <= q->nr_requests) { + blk_clear_queue_full(q, BLK_RW_SYNC); + wake_up(&rl->wait[BLK_RW_SYNC]); } - if (rl->count[WRITE] >= q->nr_requests) { - blk_set_queue_full(q, WRITE); - } else if (rl->count[WRITE]+1 <= q->nr_requests) { - blk_clear_queue_full(q, WRITE); - wake_up(&rl->wait[WRITE]); + if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) { + blk_set_queue_full(q, BLK_RW_ASYNC); + } else if (rl->count[BLK_RW_ASYNC]+1 <= q->nr_requests) { + blk_clear_queue_full(q, BLK_RW_ASYNC); + wake_up(&rl->wait[BLK_RW_ASYNC]); } spin_unlock_irq(q->queue_lock); return ret; -- cgit v1.2.3-59-g8ed1b From 26308eab69aa193f7b3fb50764a64ae14544a39b Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Fri, 27 Mar 2009 10:31:51 +0100 Subject: block: fix inconsistency in I/O stat accounting code This forces in_flight to be zero when turning off or on the I/O stat accounting and stops updating I/O stats in attempt_merge() when accounting is turned off. Signed-off-by: Jerome Marchand Signed-off-by: Jens Axboe --- block/blk-core.c | 13 ++++--------- block/blk-merge.c | 29 +++++++++++++++++------------ block/blk-sysfs.c | 4 ++++ block/blk.h | 10 ++++++---- block/elevator.c | 2 +- include/linux/elevator.h | 1 + 6 files changed, 33 insertions(+), 26 deletions(-) (limited to 'block/blk-sysfs.c') diff --git a/block/blk-core.c b/block/blk-core.c index 25572802dac2..3688abff2430 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -64,12 +64,11 @@ static struct workqueue_struct *kblockd_workqueue; static void drive_stat_acct(struct request *rq, int new_io) { - struct gendisk *disk = rq->rq_disk; struct hd_struct *part; int rw = rq_data_dir(rq); int cpu; - if (!blk_fs_request(rq) || !disk || !blk_do_io_stat(disk->queue)) + if (!blk_fs_request(rq) || !blk_do_io_stat(rq)) return; cpu = part_stat_lock(); @@ -1675,9 +1674,7 @@ EXPORT_SYMBOL(blkdev_dequeue_request); static void blk_account_io_completion(struct request *req, unsigned int bytes) { - struct gendisk *disk = req->rq_disk; - - if (!disk || !blk_do_io_stat(disk->queue)) + if (!blk_do_io_stat(req)) return; if (blk_fs_request(req)) { @@ -1694,9 +1691,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes) static void blk_account_io_done(struct request *req) { - struct gendisk *disk = req->rq_disk; - - if (!disk || !blk_do_io_stat(disk->queue)) + if (!blk_do_io_stat(req)) return; /* @@ -1711,7 +1706,7 @@ static void blk_account_io_done(struct request *req) int cpu; cpu = part_stat_lock(); - part = disk_map_sector_rcu(disk, req->sector); + part = disk_map_sector_rcu(req->rq_disk, req->sector); part_stat_inc(cpu, part, ios[rw]); part_stat_add(cpu, part, ticks[rw], duration); diff --git a/block/blk-merge.c b/block/blk-merge.c index e39cb24b7679..63760ca3da0f 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -338,6 +338,22 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, return 1; } +static void blk_account_io_merge(struct request *req) +{ + if (blk_do_io_stat(req)) { + struct hd_struct *part; + int cpu; + + cpu = part_stat_lock(); + part = disk_map_sector_rcu(req->rq_disk, req->sector); + + part_round_stats(cpu, part); + part_dec_in_flight(part); + + part_stat_unlock(); + } +} + /* * Has to be called with the request spinlock acquired */ @@ -386,18 +402,7 @@ static int attempt_merge(struct request_queue *q, struct request *req, elv_merge_requests(q, req, next); - if (req->rq_disk) { - struct hd_struct *part; - int cpu; - - cpu = part_stat_lock(); - part = disk_map_sector_rcu(req->rq_disk, req->sector); - - part_round_stats(cpu, part); - part_dec_in_flight(part); - - part_stat_unlock(); - } + blk_account_io_merge(req); req->ioprio = ioprio_best(req->ioprio, next->ioprio); if (blk_rq_cpu_valid(next)) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 3ff9bba3379a..73f36beff5cd 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -209,10 +209,14 @@ static ssize_t queue_iostats_store(struct request_queue *q, const char *page, ssize_t ret = queue_var_store(&stats, page, count); spin_lock_irq(q->queue_lock); + elv_quisce_start(q); + if (stats) queue_flag_set(QUEUE_FLAG_IO_STAT, q); else queue_flag_clear(QUEUE_FLAG_IO_STAT, q); + + elv_quisce_end(q); spin_unlock_irq(q->queue_lock); return ret; diff --git a/block/blk.h b/block/blk.h index 22043c2886c7..24fcaeeaf620 100644 --- a/block/blk.h +++ b/block/blk.h @@ -112,12 +112,14 @@ static inline int blk_cpu_to_group(int cpu) #endif } -static inline int blk_do_io_stat(struct request_queue *q) +static inline int blk_do_io_stat(struct request *rq) { - if (q) - return blk_queue_io_stat(q); + struct gendisk *disk = rq->rq_disk; - return 0; + if (!disk || !disk->queue) + return 0; + + return blk_queue_io_stat(disk->queue) && (rq->cmd_flags & REQ_ELVPRIV); } #endif diff --git a/block/elevator.c b/block/elevator.c index c6744913ff4a..fb81bcc14a8c 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -573,7 +573,7 @@ void elv_requeue_request(struct request_queue *q, struct request *rq) elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); } -static void elv_drain_elevator(struct request_queue *q) +void elv_drain_elevator(struct request_queue *q) { static int printed; while (q->elevator->ops->elevator_dispatch_fn(q, 1)) diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 7a204256b155..c59b769f62b0 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -116,6 +116,7 @@ extern void elv_abort_queue(struct request_queue *); extern void elv_completed_request(struct request_queue *, struct request *); extern int elv_set_request(struct request_queue *, struct request *, gfp_t); extern void elv_put_request(struct request_queue *, struct request *); +extern void elv_drain_elevator(struct request_queue *); /* * io scheduler registration -- cgit v1.2.3-59-g8ed1b From f600abe2de81628c40effbb3f8eaf5af0d291e57 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 8 Apr 2009 14:22:01 +0200 Subject: block: fix bad spelling of quiesce Credit goes to Andrew Morton for spotting this one. Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 4 ++-- block/blk.h | 4 ++-- block/elevator.c | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'block/blk-sysfs.c') diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 73f36beff5cd..cac4e9febe6a 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -209,14 +209,14 @@ static ssize_t queue_iostats_store(struct request_queue *q, const char *page, ssize_t ret = queue_var_store(&stats, page, count); spin_lock_irq(q->queue_lock); - elv_quisce_start(q); + elv_quiesce_start(q); if (stats) queue_flag_set(QUEUE_FLAG_IO_STAT, q); else queue_flag_clear(QUEUE_FLAG_IO_STAT, q); - elv_quisce_end(q); + elv_quiesce_end(q); spin_unlock_irq(q->queue_lock); return ret; diff --git a/block/blk.h b/block/blk.h index 24fcaeeaf620..5dfc41267a08 100644 --- a/block/blk.h +++ b/block/blk.h @@ -70,8 +70,8 @@ void blk_queue_congestion_threshold(struct request_queue *q); int blk_dev_init(void); -void elv_quisce_start(struct request_queue *q); -void elv_quisce_end(struct request_queue *q); +void elv_quiesce_start(struct request_queue *q); +void elv_quiesce_end(struct request_queue *q); /* diff --git a/block/elevator.c b/block/elevator.c index fb81bcc14a8c..7073a9072577 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -590,7 +590,7 @@ void elv_drain_elevator(struct request_queue *q) /* * Call with queue lock held, interrupts disabled */ -void elv_quisce_start(struct request_queue *q) +void elv_quiesce_start(struct request_queue *q) { queue_flag_set(QUEUE_FLAG_ELVSWITCH, q); @@ -607,7 +607,7 @@ void elv_quisce_start(struct request_queue *q) } } -void elv_quisce_end(struct request_queue *q) +void elv_quiesce_end(struct request_queue *q) { queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); } @@ -1126,7 +1126,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) * Turn on BYPASS and drain all requests w/ elevator private data */ spin_lock_irq(q->queue_lock); - elv_quisce_start(q); + elv_quiesce_start(q); /* * Remember old elevator. @@ -1150,7 +1150,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) */ elevator_exit(old_elevator); spin_lock_irq(q->queue_lock); - elv_quisce_end(q); + elv_quiesce_end(q); spin_unlock_irq(q->queue_lock); blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name); -- cgit v1.2.3-59-g8ed1b From 42dad7647aec49b3ad20dd0cb832b232a6ae514f Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Wed, 22 Apr 2009 14:01:49 +0200 Subject: block: simplify I/O stat accounting This simplifies I/O stat accounting switching code and separates it completely from I/O scheduler switch code. Requests are accounted according to the state of their request queue at the time of the request allocation. There is no need anymore to flush the request queue when switching I/O accounting state. Signed-off-by: Jerome Marchand Signed-off-by: Jens Axboe --- block/blk-core.c | 6 ++++-- block/blk-merge.c | 5 ++++- block/blk-sysfs.c | 4 ---- block/blk.h | 7 +------ include/linux/blkdev.h | 3 +++ 5 files changed, 12 insertions(+), 13 deletions(-) (limited to 'block/blk-sysfs.c') diff --git a/block/blk-core.c b/block/blk-core.c index 07ab75403e1a..2998fe3a2377 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -643,7 +643,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq) } static struct request * -blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask) +blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask) { struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); @@ -652,7 +652,7 @@ blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask) blk_rq_init(q, rq); - rq->cmd_flags = rw | REQ_ALLOCED; + rq->cmd_flags = flags | REQ_ALLOCED; if (priv) { if (unlikely(elv_set_request(q, rq, gfp_mask))) { @@ -792,6 +792,8 @@ static struct request *get_request(struct request_queue *q, int rw_flags, if (priv) rl->elvpriv++; + if (blk_queue_io_stat(q)) + rw_flags |= REQ_IO_STAT; spin_unlock_irq(q->queue_lock); rq = blk_alloc_request(q, rw_flags, priv, gfp_mask); diff --git a/block/blk-merge.c b/block/blk-merge.c index 63760ca3da0f..23d2a6fe34a3 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -402,7 +402,10 @@ static int attempt_merge(struct request_queue *q, struct request *req, elv_merge_requests(q, req, next); - blk_account_io_merge(req); + /* + * 'next' is going away, so update stats accordingly + */ + blk_account_io_merge(next); req->ioprio = ioprio_best(req->ioprio, next->ioprio); if (blk_rq_cpu_valid(next)) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index cac4e9febe6a..3ff9bba3379a 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -209,14 +209,10 @@ static ssize_t queue_iostats_store(struct request_queue *q, const char *page, ssize_t ret = queue_var_store(&stats, page, count); spin_lock_irq(q->queue_lock); - elv_quiesce_start(q); - if (stats) queue_flag_set(QUEUE_FLAG_IO_STAT, q); else queue_flag_clear(QUEUE_FLAG_IO_STAT, q); - - elv_quiesce_end(q); spin_unlock_irq(q->queue_lock); return ret; diff --git a/block/blk.h b/block/blk.h index 5dfc41267a08..79c85f7c9ff5 100644 --- a/block/blk.h +++ b/block/blk.h @@ -114,12 +114,7 @@ static inline int blk_cpu_to_group(int cpu) static inline int blk_do_io_stat(struct request *rq) { - struct gendisk *disk = rq->rq_disk; - - if (!disk || !disk->queue) - return 0; - - return blk_queue_io_stat(disk->queue) && (rq->cmd_flags & REQ_ELVPRIV); + return rq->rq_disk && blk_rq_io_stat(rq); } #endif diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ba54c834a590..2755d5c6da22 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -118,6 +118,7 @@ enum rq_flag_bits { __REQ_COPY_USER, /* contains copies of user pages */ __REQ_INTEGRITY, /* integrity metadata has been remapped */ __REQ_NOIDLE, /* Don't anticipate more IO after this one */ + __REQ_IO_STAT, /* account I/O stat */ __REQ_NR_BITS, /* stops here */ }; @@ -145,6 +146,7 @@ enum rq_flag_bits { #define REQ_COPY_USER (1 << __REQ_COPY_USER) #define REQ_INTEGRITY (1 << __REQ_INTEGRITY) #define REQ_NOIDLE (1 << __REQ_NOIDLE) +#define REQ_IO_STAT (1 << __REQ_IO_STAT) #define BLK_MAX_CDB 16 @@ -598,6 +600,7 @@ enum { blk_failfast_transport(rq) || \ blk_failfast_driver(rq)) #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) +#define blk_rq_io_stat(rq) ((rq)->cmd_flags & REQ_IO_STAT) #define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq))) -- cgit v1.2.3-59-g8ed1b