From 531ed6261e7466907418b1a9971a5c71d7d250e4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 18 Jun 2014 11:21:08 -0400 Subject: blk-mq: fix a memory ordering bug in blk_mq_queue_enter() blk-mq uses a percpu_counter to keep track of how many usages are in flight. The percpu_counter is drained while freezing to ensure that no usage is left in-flight after freezing is complete. blk_mq_queue_enter/exit() and blk_mq_[un]freeze_queue() implement this per-cpu gating mechanism; unfortunately, it contains a subtle bug - smp_wmb() in blk_mq_queue_enter() doesn't prevent prevent the cpu from fetching @q->bypass_depth before incrementing @q->mq_usage_counter and if freezing happens inbetween the caller can slip through and freezing can be complete while there are active users. Use smp_mb() instead so that bypass_depth and mq_usage_counter modifications and tests are properly interlocked. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Nicholas A. Bellinger Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/blk-mq.c b/block/blk-mq.c index ad69ef657e85..9541f5111ba6 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -81,7 +81,7 @@ static int blk_mq_queue_enter(struct request_queue *q) int ret; __percpu_counter_add(&q->mq_usage_counter, 1, 1000000); - smp_wmb(); + smp_mb(); /* we have problems freezing the queue if it's initializing */ if (!blk_queue_dying(q) && -- cgit v1.2.3-59-g8ed1b From 776687bce42bb22cce48b5da950e48ebbb9a948f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 1 Jul 2014 10:29:17 -0600 Subject: block, blk-mq: draining can't be skipped even if bypass_depth was non-zero Currently, both blk_queue_bypass_start() and blk_mq_freeze_queue() skip queue draining if bypass_depth was already above zero. The assumption is that the one which bumped the bypass_depth should have performed draining already; however, there's nothing which prevents a new instance of bypassing/freezing from starting before the previous one finishes draining. The current code may allow the later bypassing/freezing instances to complete while there still are in-flight requests which haven't finished draining. Fix it by draining regardless of bypass_depth. We still skip draining from blk_queue_bypass_start() while the queue is initializing to avoid introducing excessive delays during boot. INIT_DONE setting is moved above the initial blk_queue_bypass_end() so that bypassing attempts can't slip inbetween. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Nicholas A. Bellinger Signed-off-by: Jens Axboe --- block/blk-core.c | 11 +++++++---- block/blk-mq.c | 7 ++----- block/blk-sysfs.c | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 6f8dba161bfe..0d0bdd65b2d7 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -438,14 +438,17 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all) */ void blk_queue_bypass_start(struct request_queue *q) { - bool drain; - spin_lock_irq(q->queue_lock); - drain = !q->bypass_depth++; + q->bypass_depth++; queue_flag_set(QUEUE_FLAG_BYPASS, q); spin_unlock_irq(q->queue_lock); - if (drain) { + /* + * Queues start drained. Skip actual draining till init is + * complete. This avoids lenghty delays during queue init which + * can happen many times during boot. + */ + if (blk_queue_init_done(q)) { spin_lock_irq(q->queue_lock); __blk_drain_queue(q, false); spin_unlock_irq(q->queue_lock); diff --git a/block/blk-mq.c b/block/blk-mq.c index 9541f5111ba6..f4bdddd7ed99 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -131,15 +131,12 @@ void blk_mq_drain_queue(struct request_queue *q) */ static void blk_mq_freeze_queue(struct request_queue *q) { - bool drain; - spin_lock_irq(q->queue_lock); - drain = !q->bypass_depth++; + q->bypass_depth++; queue_flag_set(QUEUE_FLAG_BYPASS, q); spin_unlock_irq(q->queue_lock); - if (drain) - blk_mq_drain_queue(q); + blk_mq_drain_queue(q); } static void blk_mq_unfreeze_queue(struct request_queue *q) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 23321fbab293..4db5abf96b9e 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -554,8 +554,8 @@ int blk_register_queue(struct gendisk *disk) * Initialization must be complete by now. Finish the initial * bypass from queue allocation. */ - blk_queue_bypass_end(q); queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q); + blk_queue_bypass_end(q); ret = blk_trace_init_sysfs(dev); if (ret) -- cgit v1.2.3-59-g8ed1b From 780db2071ac4d167ee4154ad9c96088f1bba044b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 1 Jul 2014 10:31:13 -0600 Subject: blk-mq: decouble blk-mq freezing from generic bypassing blk_mq freezing is entangled with generic bypassing which bypasses blkcg and io scheduler and lets IO requests fall through the block layer to the drivers in FIFO order. This allows forward progress on IOs with the advanced features disabled so that those features can be configured or altered without worrying about stalling IO which may lead to deadlock through memory allocation. However, generic bypassing doesn't quite fit blk-mq. blk-mq currently doesn't make use of blkcg or ioscheds and it maps bypssing to freezing, which blocks request processing and drains all the in-flight ones. This causes problems as bypassing assumes that request processing is online. blk-mq works around this by conditionally allowing request processing for the problem case - during queue initialization. Another weirdity is that except for during queue cleanup, bypassing started on the generic side prevents blk-mq from processing new requests but doesn't drain the in-flight ones. This shouldn't break anything but again highlights that something isn't quite right here. The root cause is conflating blk-mq freezing and generic bypassing which are two different mechanisms. The only intersecting purpose that they serve is during queue cleanup. Let's properly separate blk-mq freezing from generic bypassing and simply use it where necessary. * request_queue->mq_freeze_depth is added and blk_mq_[un]freeze_queue() now operate on this counter instead of ->bypass_depth. The replacement for QUEUE_FLAG_BYPASS isn't added but the counter is tested directly. This will be further updated by later changes. * blk_mq_drain_queue() is dropped and "__" prefix is dropped from blk_mq_freeze_queue(). Queue cleanup path now calls blk_mq_freeze_queue() directly. * blk_queue_enter()'s fast path condition is simplified to simply check @q->mq_freeze_depth. Previously, the condition was !blk_queue_dying(q) && (!blk_queue_bypass(q) || !blk_queue_init_done(q)) mq_freeze_depth is incremented right after dying is set and blk_queue_init_done() exception isn't necessary as blk-mq doesn't start frozen, which only leaves the blk_queue_bypass() test which can be replaced by @q->mq_freeze_depth test. This change simplifies the code and reduces confusion in the area. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Nicholas A. Bellinger Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- block/blk-mq.c | 17 ++++++----------- block/blk-mq.h | 2 +- include/linux/blkdev.h | 1 + 4 files changed, 9 insertions(+), 13 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 0d0bdd65b2d7..c359d72e9d76 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -514,7 +514,7 @@ void blk_cleanup_queue(struct request_queue *q) * prevent that q->request_fn() gets invoked after draining finished. */ if (q->mq_ops) { - blk_mq_drain_queue(q); + blk_mq_freeze_queue(q); spin_lock_irq(lock); } else { spin_lock_irq(lock); diff --git a/block/blk-mq.c b/block/blk-mq.c index f4bdddd7ed99..1e324a123d40 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -84,15 +84,14 @@ static int blk_mq_queue_enter(struct request_queue *q) smp_mb(); /* we have problems freezing the queue if it's initializing */ - if (!blk_queue_dying(q) && - (!blk_queue_bypass(q) || !blk_queue_init_done(q))) + if (!q->mq_freeze_depth) return 0; __percpu_counter_add(&q->mq_usage_counter, -1, 1000000); spin_lock_irq(q->queue_lock); ret = wait_event_interruptible_lock_irq(q->mq_freeze_wq, - !blk_queue_bypass(q) || blk_queue_dying(q), + !q->mq_freeze_depth || blk_queue_dying(q), *q->queue_lock); /* inc usage with lock hold to avoid freeze_queue runs here */ if (!ret && !blk_queue_dying(q)) @@ -129,11 +128,10 @@ void blk_mq_drain_queue(struct request_queue *q) * Guarantee no request is in use, so we can change any data structure of * the queue afterward. */ -static void blk_mq_freeze_queue(struct request_queue *q) +void blk_mq_freeze_queue(struct request_queue *q) { spin_lock_irq(q->queue_lock); - q->bypass_depth++; - queue_flag_set(QUEUE_FLAG_BYPASS, q); + q->mq_freeze_depth++; spin_unlock_irq(q->queue_lock); blk_mq_drain_queue(q); @@ -144,11 +142,8 @@ static void blk_mq_unfreeze_queue(struct request_queue *q) bool wake = false; spin_lock_irq(q->queue_lock); - if (!--q->bypass_depth) { - queue_flag_clear(QUEUE_FLAG_BYPASS, q); - wake = true; - } - WARN_ON_ONCE(q->bypass_depth < 0); + wake = !--q->mq_freeze_depth; + WARN_ON_ONCE(q->mq_freeze_depth < 0); spin_unlock_irq(q->queue_lock); if (wake) wake_up_all(&q->mq_freeze_wq); diff --git a/block/blk-mq.h b/block/blk-mq.h index 26460884c6cd..ca4964a6295d 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -28,7 +28,7 @@ struct blk_mq_ctx { void __blk_mq_complete_request(struct request *rq); void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_init_flush(struct request_queue *q); -void blk_mq_drain_queue(struct request_queue *q); +void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_free_queue(struct request_queue *q); void blk_mq_clone_flush_request(struct request *flush_rq, struct request *orig_rq); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8699bcf5f099..c8f344ff74fe 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -470,6 +470,7 @@ struct request_queue { struct mutex sysfs_lock; int bypass_depth; + int mq_freeze_depth; #if defined(CONFIG_BLK_DEV_BSG) bsg_job_fn *bsg_job_fn; -- cgit v1.2.3-59-g8ed1b From 72d6f02a8d4e0dda74de3a541b1c4ae82f5f7b45 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 1 Jul 2014 10:33:02 -0600 Subject: blk-mq: collapse __blk_mq_drain_queue() into blk_mq_freeze_queue() Keeping __blk_mq_drain_queue() as a separate function doesn't buy us anything and it's gonna be further simplified. Let's flatten it into its caller. This patch doesn't make any functional change. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Nicholas A. Bellinger Signed-off-by: Jens Axboe --- block/blk-mq.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) (limited to 'block') diff --git a/block/blk-mq.c b/block/blk-mq.c index 1e324a123d40..22682fb4be65 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -108,8 +108,16 @@ static void blk_mq_queue_exit(struct request_queue *q) __percpu_counter_add(&q->mq_usage_counter, -1, 1000000); } -void blk_mq_drain_queue(struct request_queue *q) +/* + * Guarantee no request is in use, so we can change any data structure of + * the queue afterward. + */ +void blk_mq_freeze_queue(struct request_queue *q) { + spin_lock_irq(q->queue_lock); + q->mq_freeze_depth++; + spin_unlock_irq(q->queue_lock); + while (true) { s64 count; @@ -124,19 +132,6 @@ void blk_mq_drain_queue(struct request_queue *q) } } -/* - * Guarantee no request is in use, so we can change any data structure of - * the queue afterward. - */ -void blk_mq_freeze_queue(struct request_queue *q) -{ - spin_lock_irq(q->queue_lock); - q->mq_freeze_depth++; - spin_unlock_irq(q->queue_lock); - - blk_mq_drain_queue(q); -} - static void blk_mq_unfreeze_queue(struct request_queue *q) { bool wake = false; -- cgit v1.2.3-59-g8ed1b From add703fda981b9719d37f371498b9f129acbd997 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 1 Jul 2014 10:34:38 -0600 Subject: blk-mq: use percpu_ref for mq usage count Currently, blk-mq uses a percpu_counter to keep track of how many usages are in flight. The percpu_counter is drained while freezing to ensure that no usage is left in-flight after freezing is complete. blk_mq_queue_enter/exit() and blk_mq_[un]freeze_queue() implement this per-cpu gating mechanism. This type of code has relatively high chance of subtle bugs which are extremely difficult to trigger and it's way too hairy to be open coded in blk-mq. percpu_ref can serve the same purpose after the recent changes. This patch replaces the open-coded per-cpu usage counting and draining mechanism with percpu_ref. blk_mq_queue_enter() performs tryget_live on the ref and exit() performs put. blk_mq_freeze_queue() kills the ref and waits until the reference count reaches zero. blk_mq_unfreeze_queue() revives the ref and wakes up the waiters. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Nicholas A. Bellinger Cc: Kent Overstreet Signed-off-by: Jens Axboe --- block/blk-mq.c | 68 +++++++++++++++++++++----------------------------- include/linux/blkdev.h | 3 ++- 2 files changed, 31 insertions(+), 40 deletions(-) (limited to 'block') diff --git a/block/blk-mq.c b/block/blk-mq.c index 22682fb4be65..5189cb1e478a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -78,34 +78,32 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx, static int blk_mq_queue_enter(struct request_queue *q) { - int ret; - - __percpu_counter_add(&q->mq_usage_counter, 1, 1000000); - smp_mb(); - - /* we have problems freezing the queue if it's initializing */ - if (!q->mq_freeze_depth) - return 0; - - __percpu_counter_add(&q->mq_usage_counter, -1, 1000000); + while (true) { + int ret; - spin_lock_irq(q->queue_lock); - ret = wait_event_interruptible_lock_irq(q->mq_freeze_wq, - !q->mq_freeze_depth || blk_queue_dying(q), - *q->queue_lock); - /* inc usage with lock hold to avoid freeze_queue runs here */ - if (!ret && !blk_queue_dying(q)) - __percpu_counter_add(&q->mq_usage_counter, 1, 1000000); - else if (blk_queue_dying(q)) - ret = -ENODEV; - spin_unlock_irq(q->queue_lock); + if (percpu_ref_tryget_live(&q->mq_usage_counter)) + return 0; - return ret; + ret = wait_event_interruptible(q->mq_freeze_wq, + !q->mq_freeze_depth || blk_queue_dying(q)); + if (blk_queue_dying(q)) + return -ENODEV; + if (ret) + return ret; + } } static void blk_mq_queue_exit(struct request_queue *q) { - __percpu_counter_add(&q->mq_usage_counter, -1, 1000000); + percpu_ref_put(&q->mq_usage_counter); +} + +static void blk_mq_usage_counter_release(struct percpu_ref *ref) +{ + struct request_queue *q = + container_of(ref, struct request_queue, mq_usage_counter); + + wake_up_all(&q->mq_freeze_wq); } /* @@ -118,18 +116,9 @@ void blk_mq_freeze_queue(struct request_queue *q) q->mq_freeze_depth++; spin_unlock_irq(q->queue_lock); - while (true) { - s64 count; - - spin_lock_irq(q->queue_lock); - count = percpu_counter_sum(&q->mq_usage_counter); - spin_unlock_irq(q->queue_lock); - - if (count == 0) - break; - blk_mq_start_hw_queues(q); - msleep(10); - } + percpu_ref_kill(&q->mq_usage_counter); + blk_mq_run_queues(q, false); + wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter)); } static void blk_mq_unfreeze_queue(struct request_queue *q) @@ -140,8 +129,10 @@ static void blk_mq_unfreeze_queue(struct request_queue *q) wake = !--q->mq_freeze_depth; WARN_ON_ONCE(q->mq_freeze_depth < 0); spin_unlock_irq(q->queue_lock); - if (wake) + if (wake) { + percpu_ref_reinit(&q->mq_usage_counter); wake_up_all(&q->mq_freeze_wq); + } } bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) @@ -1785,7 +1776,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) if (!q) goto err_hctxs; - if (percpu_counter_init(&q->mq_usage_counter, 0)) + if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release)) goto err_map; setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); @@ -1878,7 +1869,7 @@ void blk_mq_free_queue(struct request_queue *q) blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); blk_mq_free_hw_queues(q, set); - percpu_counter_destroy(&q->mq_usage_counter); + percpu_ref_exit(&q->mq_usage_counter); free_percpu(q->queue_ctx); kfree(q->queue_hw_ctx); @@ -2037,8 +2028,7 @@ static int __init blk_mq_init(void) { blk_mq_cpu_init(); - /* Must be called after percpu_counter_hotcpu_callback() */ - hotcpu_notifier(blk_mq_queue_reinit_notify, -10); + hotcpu_notifier(blk_mq_queue_reinit_notify, 0); return 0; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c8f344ff74fe..518b46555b80 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -484,7 +485,7 @@ struct request_queue { #endif struct rcu_head rcu_head; wait_queue_head_t mq_freeze_wq; - struct percpu_counter mq_usage_counter; + struct percpu_ref mq_usage_counter; struct list_head all_q_node; struct blk_mq_tag_set *tag_set; -- cgit v1.2.3-59-g8ed1b From cbcd1054a1fd2aa980fc11ff28e436fc4aaa2d54 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Tue, 1 Jul 2014 10:36:47 -0600 Subject: bio-integrity: add "bip_max_vcnt" into struct bio_integrity_payload Commit 08778795 ("block: Fix nr_vecs for inline integrity vectors") from Martin introduces the function bip_integrity_vecs(get the useful vectors) to fix the issue about nr_vecs for inline integrity vectors that reported by David Milburn. But it seems that bip_integrity_vecs() will return the wrong number if the bio is not based on any bio_set for some reason(bio->bi_pool == NULL), because in that case, the bip_inline_vecs[0] is malloced directly. So here we add the bip_max_vcnt to record the count of vector slots, and cleanup the function bip_integrity_vecs(). Signed-off-by: Gu Zheng Cc: Martin K. Petersen Cc: Kent Overstreet Signed-off-by: Jens Axboe --- block/bio-integrity.c | 12 +++--------- include/linux/bio.h | 1 + 2 files changed, 4 insertions(+), 9 deletions(-) (limited to 'block') diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 9e241063a616..bc423f7b02da 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -70,8 +70,10 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, bs->bvec_integrity_pool); if (!bip->bip_vec) goto err; + bip->bip_max_vcnt = bvec_nr_vecs(idx); } else { bip->bip_vec = bip->bip_inline_vecs; + bip->bip_max_vcnt = inline_vecs; } bip->bip_slab = idx; @@ -114,14 +116,6 @@ void bio_integrity_free(struct bio *bio) } EXPORT_SYMBOL(bio_integrity_free); -static inline unsigned int bip_integrity_vecs(struct bio_integrity_payload *bip) -{ - if (bip->bip_slab == BIO_POOL_NONE) - return BIP_INLINE_VECS; - - return bvec_nr_vecs(bip->bip_slab); -} - /** * bio_integrity_add_page - Attach integrity metadata * @bio: bio to update @@ -137,7 +131,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, struct bio_integrity_payload *bip = bio->bi_integrity; struct bio_vec *iv; - if (bip->bip_vcnt >= bip_integrity_vecs(bip)) { + if (bip->bip_vcnt >= bip->bip_max_vcnt) { printk(KERN_ERR "%s: bip_vec full\n", __func__); return 0; } diff --git a/include/linux/bio.h b/include/linux/bio.h index d2633ee099d9..b39e5000ff58 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -308,6 +308,7 @@ struct bio_integrity_payload { unsigned short bip_slab; /* slab the bip came from */ unsigned short bip_vcnt; /* # of integrity bio_vecs */ + unsigned short bip_max_vcnt; /* integrity bio_vec slots */ unsigned bip_owns_buf:1; /* should free bip_buf */ struct work_struct bip_work; /* I/O completion */ -- cgit v1.2.3-59-g8ed1b From 472d5e2af28cc6ae9749ce22f951ea426fdfc392 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Thu, 12 Jun 2014 19:45:17 +0200 Subject: block/partitions/aix.c: replace count*size kzalloc by kcalloc kcalloc manages count*sizeof overflow. Cc: Jens Axboe Cc: Andrew Morton Signed-off-by: Fabian Frederick Signed-off-by: Jens Axboe --- block/partitions/aix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/partitions/aix.c b/block/partitions/aix.c index 43be471d9b1d..0a6ed546331d 100644 --- a/block/partitions/aix.c +++ b/block/partitions/aix.c @@ -215,7 +215,7 @@ int aix_partition(struct parsed_partitions *state) numlvs = be16_to_cpu(p->numlvs); put_dev_sector(sect); } - lvip = kzalloc(sizeof(struct lv_info) * state->limit, GFP_KERNEL); + lvip = kcalloc(state->limit, sizeof(struct lv_info), GFP_KERNEL); if (!lvip) return 0; if (numlvs && (d = read_part_sector(state, vgda_sector + 1, §))) { -- cgit v1.2.3-59-g8ed1b From 600ffc5ead7fd0bc5b9950842b2aece26682b0e0 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Thu, 12 Jun 2014 20:04:52 +0200 Subject: block/partitions/amiga.c: replace nolevel printk by pr_err Also add no prefix pr_fmt to avoid any future default format update Cc: Jens Axboe Cc: Andrew Morton Signed-off-by: Fabian Frederick Signed-off-by: Jens Axboe --- block/partitions/amiga.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'block') diff --git a/block/partitions/amiga.c b/block/partitions/amiga.c index 70cbf44a1560..2b13533d60a2 100644 --- a/block/partitions/amiga.c +++ b/block/partitions/amiga.c @@ -7,6 +7,8 @@ * Re-organised Feb 1998 Russell King */ +#define pr_fmt(fmt) fmt + #include #include @@ -40,7 +42,7 @@ int amiga_partition(struct parsed_partitions *state) data = read_part_sector(state, blk, §); if (!data) { if (warn_no_part) - printk("Dev %s: unable to read RDB block %d\n", + pr_err("Dev %s: unable to read RDB block %d\n", bdevname(state->bdev, b), blk); res = -1; goto rdb_done; @@ -57,12 +59,12 @@ int amiga_partition(struct parsed_partitions *state) *(__be32 *)(data+0xdc) = 0; if (checksum_block((__be32 *)data, be32_to_cpu(rdb->rdb_SummedLongs) & 0x7F)==0) { - printk("Warning: Trashed word at 0xd0 in block %d " - "ignored in checksum calculation\n",blk); + pr_err("Trashed word at 0xd0 in block %d ignored in checksum calculation\n", + blk); break; } - printk("Dev %s: RDB in block %d has bad checksum\n", + pr_err("Dev %s: RDB in block %d has bad checksum\n", bdevname(state->bdev, b), blk); } @@ -83,7 +85,7 @@ int amiga_partition(struct parsed_partitions *state) data = read_part_sector(state, blk, §); if (!data) { if (warn_no_part) - printk("Dev %s: unable to read partition block %d\n", + pr_err("Dev %s: unable to read partition block %d\n", bdevname(state->bdev, b), blk); res = -1; goto rdb_done; -- cgit v1.2.3-59-g8ed1b From dce14c239ad550816d07f7441fc10a2d5739fd29 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Thu, 12 Jun 2014 20:16:57 +0200 Subject: block/partitions/msdos.c: code clean-up checkpatch fixing: WARNING: Missing a blank line after declarations WARNING: space prohibited between function name and open parenthesis '(' ERROR: spaces required around that '<' (ctx:VxV) Cc: Jens Axboe Cc: Andrew Morton Signed-off-by: Fabian Frederick Signed-off-by: Jens Axboe --- block/partitions/msdos.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'block') diff --git a/block/partitions/msdos.c b/block/partitions/msdos.c index 9123f250b425..93e7c1b32edd 100644 --- a/block/partitions/msdos.c +++ b/block/partitions/msdos.c @@ -159,8 +159,9 @@ static void parse_extended(struct parsed_partitions *state, /* * First process the data partition(s) */ - for (i=0; i<4; i++, p++) { + for (i = 0; i < 4; i++, p++) { sector_t offs, size, next; + if (!nr_sects(p) || is_extended_partition(p)) continue; @@ -194,7 +195,7 @@ static void parse_extended(struct parsed_partitions *state, * It should be a link to the next logical partition. */ p -= 4; - for (i=0; i<4; i++, p++) + for (i = 0; i < 4; i++, p++) if (nr_sects(p) && is_extended_partition(p)) break; if (i == 4) @@ -243,8 +244,8 @@ static void parse_solaris_x86(struct parsed_partitions *state, return; } /* Ensure we can handle previous case of VTOC with 8 entries gracefully */ - max_nparts = le16_to_cpu (v->v_nparts) > 8 ? SOLARIS_X86_NUMSLICE : 8; - for (i=0; inextlimit; i++) { + max_nparts = le16_to_cpu(v->v_nparts) > 8 ? SOLARIS_X86_NUMSLICE : 8; + for (i = 0; i < max_nparts && state->next < state->limit; i++) { struct solaris_x86_slice *s = &v->v_slice[i]; char tmp[3 + 10 + 1 + 1]; @@ -409,7 +410,7 @@ static void parse_minix(struct parsed_partitions *state, /* The first sector of a Minix partition can have either * a secondary MBR describing its subpartitions, or * the normal boot sector. */ - if (msdos_magic_present (data + 510) && + if (msdos_magic_present(data + 510) && SYS_IND(p) == MINIX_PARTITION) { /* subpartition table present */ char tmp[1 + BDEVNAME_SIZE + 10 + 9 + 1]; @@ -527,6 +528,7 @@ int msdos_partition(struct parsed_partitions *state) for (slot = 1 ; slot <= 4 ; slot++, p++) { sector_t start = start_sect(p)*sector_size; sector_t size = nr_sects(p)*sector_size; + if (!size) continue; if (is_extended_partition(p)) { @@ -537,6 +539,7 @@ int msdos_partition(struct parsed_partitions *state) * sector, although it may not be enough/proper. */ sector_t n = 2; + n = min(size, max(sector_size, n)); put_partition(state, slot, start, n); -- cgit v1.2.3-59-g8ed1b From 16e1556526241b893d40b01d1c1b14a4e83ee499 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Thu, 12 Jun 2014 20:26:01 +0200 Subject: block/partitions/efi.c: kerneldoc fixing Adding function documentation and fixing kerneldoc warnings ('field: description' uniformization). Cc: Davidlohr Bueso Cc: Jens Axboe Signed-off-by: Fabian Frederick Signed-off-by: Jens Axboe --- block/partitions/efi.c | 46 ++++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 22 deletions(-) (limited to 'block') diff --git a/block/partitions/efi.c b/block/partitions/efi.c index dc51f467a560..56d08fd75b1a 100644 --- a/block/partitions/efi.c +++ b/block/partitions/efi.c @@ -121,7 +121,7 @@ __setup("gpt", force_gpt_fn); /** * efi_crc32() - EFI version of crc32 function * @buf: buffer to calculate crc32 of - * @len - length of buf + * @len: length of buf * * Description: Returns EFI-style CRC32 value for @buf * @@ -240,10 +240,10 @@ done: /** * read_lba(): Read bytes from disk, starting at given LBA - * @state - * @lba - * @buffer - * @size_t + * @state: disk parsed partitions + * @lba: the Logical Block Address of the partition table + * @buffer: destination buffer + * @count: bytes to read * * Description: Reads @count bytes from @state->bdev into @buffer. * Returns number of bytes read on success, 0 on error. @@ -277,8 +277,8 @@ static size_t read_lba(struct parsed_partitions *state, /** * alloc_read_gpt_entries(): reads partition entries from disk - * @state - * @gpt - GPT header + * @state: disk parsed partitions + * @gpt: GPT header * * Description: Returns ptes on success, NULL on error. * Allocates space for PTEs based on information found in @gpt. @@ -312,8 +312,8 @@ static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state, /** * alloc_read_gpt_header(): Allocates GPT header, reads into it from disk - * @state - * @lba is the Logical Block Address of the partition table + * @state: disk parsed partitions + * @lba: the Logical Block Address of the partition table * * Description: returns GPT header on success, NULL on error. Allocates * and fills a GPT header starting at @ from @state->bdev. @@ -340,10 +340,10 @@ static gpt_header *alloc_read_gpt_header(struct parsed_partitions *state, /** * is_gpt_valid() - tests one GPT header and PTEs for validity - * @state - * @lba is the logical block address of the GPT header to test - * @gpt is a GPT header ptr, filled on return. - * @ptes is a PTEs ptr, filled on return. + * @state: disk parsed partitions + * @lba: logical block address of the GPT header to test + * @gpt: GPT header ptr, filled on return. + * @ptes: PTEs ptr, filled on return. * * Description: returns 1 if valid, 0 on error. * If valid, returns pointers to newly allocated GPT header and PTEs. @@ -461,8 +461,8 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba, /** * is_pte_valid() - tests one PTE for validity - * @pte is the pte to check - * @lastlba is last lba of the disk + * @pte:pte to check + * @lastlba: last lba of the disk * * Description: returns 1 if valid, 0 on error. */ @@ -478,9 +478,10 @@ is_pte_valid(const gpt_entry *pte, const u64 lastlba) /** * compare_gpts() - Search disk for valid GPT headers and PTEs - * @pgpt is the primary GPT header - * @agpt is the alternate GPT header - * @lastlba is the last LBA number + * @pgpt: primary GPT header + * @agpt: alternate GPT header + * @lastlba: last LBA number + * * Description: Returns nothing. Sanity checks pgpt and agpt fields * and prints warnings on discrepancies. * @@ -572,9 +573,10 @@ compare_gpts(gpt_header *pgpt, gpt_header *agpt, u64 lastlba) /** * find_valid_gpt() - Search disk for valid GPT headers and PTEs - * @state - * @gpt is a GPT header ptr, filled on return. - * @ptes is a PTEs ptr, filled on return. + * @state: disk parsed partitions + * @gpt: GPT header ptr, filled on return. + * @ptes: PTEs ptr, filled on return. + * * Description: Returns 1 if valid, 0 on error. * If valid, returns pointers to newly allocated GPT header and PTEs. * Validity depends on PMBR being valid (or being overridden by the @@ -663,7 +665,7 @@ static int find_valid_gpt(struct parsed_partitions *state, gpt_header **gpt, /** * efi_partition(struct parsed_partitions *state) - * @state + * @state: disk parsed partitions * * Description: called from check.c, if the disk contains GPT * partitions, sets up partition entries in the kernel. -- cgit v1.2.3-59-g8ed1b From 63f264965947ac6299452711f614f086955b2515 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 25 May 2014 21:43:33 +0900 Subject: block: fix BLKSECTGET ioctl when max_sectors is greater than USHRT_MAX BLKSECTGET ioctl loads the request queue's max_sectors as unsigned short value to the argument pointer. So if the max_sector is greater than USHRT_MAX, the upper 16 bits of that is just discarded. In such case, USHRT_MAX is more preferable than the lower 16 bits of max_sectors. Signed-off-by: Akinobu Mita Cc: Jens Axboe Cc: "James E.J. Bottomley" Cc: Douglas Gilbert Cc: linux-scsi@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/compat_ioctl.c | 6 ++++-- block/ioctl.c | 5 ++++- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'block') diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index fbd5a67cb773..e0393cd2ea7f 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -663,6 +663,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) fmode_t mode = file->f_mode; struct backing_dev_info *bdi; loff_t size; + unsigned int max_sectors; /* * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have @@ -718,8 +719,9 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) case BLKSSZGET: /* get block device hardware sector size */ return compat_put_int(arg, bdev_logical_block_size(bdev)); case BLKSECTGET: - return compat_put_ushort(arg, - queue_max_sectors(bdev_get_queue(bdev))); + max_sectors = min_t(unsigned int, USHRT_MAX, + queue_max_sectors(bdev_get_queue(bdev))); + return compat_put_ushort(arg, max_sectors); case BLKROTATIONAL: return compat_put_ushort(arg, !blk_queue_nonrot(bdev_get_queue(bdev))); diff --git a/block/ioctl.c b/block/ioctl.c index 7d5c3b20af45..d6cda8147c91 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -278,6 +278,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, struct backing_dev_info *bdi; loff_t size; int ret, n; + unsigned int max_sectors; switch(cmd) { case BLKFLSBUF: @@ -375,7 +376,9 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, case BLKDISCARDZEROES: return put_uint(arg, bdev_discard_zeroes_data(bdev)); case BLKSECTGET: - return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev))); + max_sectors = min_t(unsigned int, USHRT_MAX, + queue_max_sectors(bdev_get_queue(bdev))); + return put_ushort(arg, max_sectors); case BLKROTATIONAL: return put_ushort(arg, !blk_queue_nonrot(bdev_get_queue(bdev))); case BLKRASET: -- cgit v1.2.3-59-g8ed1b From 9b4231bf995996d6459c57959ead5a1829ff2c57 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 25 May 2014 21:43:34 +0900 Subject: block: fix SG_[GS]ET_RESERVED_SIZE ioctl when max_sectors is huge SG_GET_RESERVED_SIZE and SG_SET_RESERVED_SIZE ioctls access a reserved buffer in bytes as int type. The value needs to be capped at the request queue's max_sectors. But integer overflow is not correctly handled in the calculation when converting max_sectors from sectors to bytes. Signed-off-by: Akinobu Mita Cc: Jens Axboe Cc: "James E.J. Bottomley" Cc: Douglas Gilbert Cc: linux-scsi@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/scsi_ioctl.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'block') diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 14695c6221c8..bda1497add4c 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -82,9 +82,18 @@ static int sg_set_timeout(struct request_queue *q, int __user *p) return err; } +static int max_sectors_bytes(struct request_queue *q) +{ + unsigned int max_sectors = queue_max_sectors(q); + + max_sectors = min_t(unsigned int, max_sectors, INT_MAX >> 9); + + return max_sectors << 9; +} + static int sg_get_reserved_size(struct request_queue *q, int __user *p) { - unsigned val = min(q->sg_reserved_size, queue_max_sectors(q) << 9); + int val = min_t(int, q->sg_reserved_size, max_sectors_bytes(q)); return put_user(val, p); } @@ -98,10 +107,8 @@ static int sg_set_reserved_size(struct request_queue *q, int __user *p) if (size < 0) return -EINVAL; - if (size > (queue_max_sectors(q) << 9)) - size = queue_max_sectors(q) << 9; - q->sg_reserved_size = size; + q->sg_reserved_size = min(size, max_sectors_bytes(q)); return 0; } -- cgit v1.2.3-59-g8ed1b From 254c4407cb84a6dec90336054615b0f0e996bb7c Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Tue, 1 Jul 2014 10:55:15 -0600 Subject: bio: modify __bio_add_page() to accept pages that don't start a new segment The original behaviour is to refuse to add a new page if the maximum number of segments has been reached, regardless of the fact the page we are going to add can be merged into the last segment or not. Unfortunately, when the system runs under heavy memory fragmentation conditions, a driver may try to add multiple pages to the last segment. The original code won't accept them and EBUSY will be reported to userspace. This patch modifies the function so it refuses to add a page only in case the latter starts a new segment and the maximum number of segments has already been reached. The bug can be easily reproduced with the st driver: 1) set CONFIG_SCSI_MPT2SAS_MAX_SGE or CONFIG_SCSI_MPT3SAS_MAX_SGE to 16 2) modprobe st buffer_kbs=1024 3) #dd if=/dev/zero of=/dev/st0 bs=1M count=10 dd: error writing `/dev/st0': Device or resource busy [ming.lei@canonical.com: update bi_iter.bi_size before recounting segments] Signed-off-by: Maurizio Lombardi Signed-off-by: Ming Lei Tested-by: Dongsu Park Tested-by: Jet Chen Cc: Al Viro Cc: Christoph Hellwig Cc: Kent Overstreet Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- block/bio.c | 52 +++++++++++++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 23 deletions(-) (limited to 'block') diff --git a/block/bio.c b/block/bio.c index 0ec61c9e536c..fb12df9af0fc 100644 --- a/block/bio.c +++ b/block/bio.c @@ -744,6 +744,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page } } + bio->bi_iter.bi_size += len; goto done; } @@ -760,28 +761,31 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page return 0; /* - * we might lose a segment or two here, but rather that than - * make this too complex. + * setup the new entry, we might clear it again later if we + * cannot add the page + */ + bvec = &bio->bi_io_vec[bio->bi_vcnt]; + bvec->bv_page = page; + bvec->bv_len = len; + bvec->bv_offset = offset; + bio->bi_vcnt++; + bio->bi_phys_segments++; + bio->bi_iter.bi_size += len; + + /* + * Perform a recount if the number of segments is greater + * than queue_max_segments(q). */ - while (bio->bi_phys_segments >= queue_max_segments(q)) { + while (bio->bi_phys_segments > queue_max_segments(q)) { if (retried_segments) - return 0; + goto failed; retried_segments = 1; blk_recount_segments(q, bio); } - /* - * setup the new entry, we might clear it again later if we - * cannot add the page - */ - bvec = &bio->bi_io_vec[bio->bi_vcnt]; - bvec->bv_page = page; - bvec->bv_len = len; - bvec->bv_offset = offset; - /* * if queue has other restrictions (eg varying max sector size * depending on offset), it can specify a merge_bvec_fn in the @@ -799,23 +803,25 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page * merge_bvec_fn() returns number of bytes it can accept * at this offset */ - if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) { - bvec->bv_page = NULL; - bvec->bv_len = 0; - bvec->bv_offset = 0; - return 0; - } + if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) + goto failed; } /* If we may be able to merge these biovecs, force a recount */ - if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) + if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) bio->bi_flags &= ~(1 << BIO_SEG_VALID); - bio->bi_vcnt++; - bio->bi_phys_segments++; done: - bio->bi_iter.bi_size += len; return len; + + failed: + bvec->bv_page = NULL; + bvec->bv_len = 0; + bvec->bv_offset = 0; + bio->bi_vcnt--; + bio->bi_iter.bi_size -= len; + blk_recount_segments(q, bio); + return 0; } /** -- cgit v1.2.3-59-g8ed1b From 26a337944e73d88838642a09689fdf40daf00069 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 14 Jul 2014 22:04:47 +0200 Subject: Revert "bio: modify __bio_add_page() to accept pages that don't start a new segment" This reverts commit 254c4407cb84a6dec90336054615b0f0e996bb7c. It causes crashes with cryptsetup, even after a few iterations and updates. Drop it for now. --- block/bio.c | 52 +++++++++++++++++++++++----------------------------- 1 file changed, 23 insertions(+), 29 deletions(-) (limited to 'block') diff --git a/block/bio.c b/block/bio.c index fb12df9af0fc..0ec61c9e536c 100644 --- a/block/bio.c +++ b/block/bio.c @@ -744,7 +744,6 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page } } - bio->bi_iter.bi_size += len; goto done; } @@ -761,31 +760,28 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page return 0; /* - * setup the new entry, we might clear it again later if we - * cannot add the page - */ - bvec = &bio->bi_io_vec[bio->bi_vcnt]; - bvec->bv_page = page; - bvec->bv_len = len; - bvec->bv_offset = offset; - bio->bi_vcnt++; - bio->bi_phys_segments++; - bio->bi_iter.bi_size += len; - - /* - * Perform a recount if the number of segments is greater - * than queue_max_segments(q). + * we might lose a segment or two here, but rather that than + * make this too complex. */ - while (bio->bi_phys_segments > queue_max_segments(q)) { + while (bio->bi_phys_segments >= queue_max_segments(q)) { if (retried_segments) - goto failed; + return 0; retried_segments = 1; blk_recount_segments(q, bio); } + /* + * setup the new entry, we might clear it again later if we + * cannot add the page + */ + bvec = &bio->bi_io_vec[bio->bi_vcnt]; + bvec->bv_page = page; + bvec->bv_len = len; + bvec->bv_offset = offset; + /* * if queue has other restrictions (eg varying max sector size * depending on offset), it can specify a merge_bvec_fn in the @@ -803,25 +799,23 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page * merge_bvec_fn() returns number of bytes it can accept * at this offset */ - if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) - goto failed; + if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) { + bvec->bv_page = NULL; + bvec->bv_len = 0; + bvec->bv_offset = 0; + return 0; + } } /* If we may be able to merge these biovecs, force a recount */ - if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) + if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) bio->bi_flags &= ~(1 << BIO_SEG_VALID); + bio->bi_vcnt++; + bio->bi_phys_segments++; done: + bio->bi_iter.bi_size += len; return len; - - failed: - bvec->bv_page = NULL; - bvec->bv_len = 0; - bvec->bv_offset = 0; - bio->bi_vcnt--; - bio->bi_iter.bi_size -= len; - blk_recount_segments(q, bio); - return 0; } /** -- cgit v1.2.3-59-g8ed1b From 2a1b4cf2331d92bc009bf94fa02a24604cdaf24c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 5 Jul 2014 18:43:21 -0400 Subject: blkcg: don't call into policy draining if root_blkg is already gone While a queue is being destroyed, all the blkgs are destroyed and its ->root_blkg pointer is set to NULL. If someone else starts to drain while the queue is in this state, the following oops happens. NULL pointer dereference at 0000000000000028 IP: [] blk_throtl_drain+0x84/0x230 PGD e4a1067 PUD b773067 PMD 0 Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: cfq_iosched(-) [last unloaded: cfq_iosched] CPU: 1 PID: 537 Comm: bash Not tainted 3.16.0-rc3-work+ #2 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff88000e222250 ti: ffff88000efd4000 task.ti: ffff88000efd4000 RIP: 0010:[] [] blk_throtl_drain+0x84/0x230 RSP: 0018:ffff88000efd7bf0 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ffff880015091450 RCX: 0000000000000001 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffff88000efd7c10 R08: 0000000000000000 R09: 0000000000000001 R10: ffff88000e222250 R11: 0000000000000000 R12: ffff880015091450 R13: ffff880015092e00 R14: ffff880015091d70 R15: ffff88001508fc28 FS: 00007f1332650740(0000) GS:ffff88001fa80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000028 CR3: 0000000009446000 CR4: 00000000000006e0 Stack: ffffffff8144e8f6 ffff880015091450 0000000000000000 ffff880015091d80 ffff88000efd7c28 ffffffff8144ae2f ffff880015091450 ffff88000efd7c58 ffffffff81427641 ffff880015091450 ffffffff82401f00 ffff880015091450 Call Trace: [] blkcg_drain_queue+0x1f/0x60 [] __blk_drain_queue+0x71/0x180 [] blk_queue_bypass_start+0x6e/0xb0 [] blkcg_deactivate_policy+0x38/0x120 [] blk_throtl_exit+0x34/0x50 [] blkcg_exit_queue+0x35/0x40 [] blk_release_queue+0x26/0xd0 [] kobject_cleanup+0x38/0x70 [] kobject_put+0x28/0x60 [] blk_put_queue+0x15/0x20 [] scsi_device_dev_release_usercontext+0x16b/0x1c0 [] execute_in_process_context+0x89/0xa0 [] scsi_device_dev_release+0x1c/0x20 [] device_release+0x32/0xa0 [] kobject_cleanup+0x38/0x70 [] kobject_put+0x28/0x60 [] put_device+0x17/0x20 [] __scsi_remove_device+0xa9/0xe0 [] scsi_remove_device+0x2b/0x40 [] sdev_store_delete+0x27/0x30 [] dev_attr_store+0x18/0x30 [] sysfs_kf_write+0x3e/0x50 [] kernfs_fop_write+0xe7/0x170 [] vfs_write+0xaf/0x1d0 [] SyS_write+0x4d/0xc0 [] system_call_fastpath+0x16/0x1b 776687bce42b ("block, blk-mq: draining can't be skipped even if bypass_depth was non-zero") made it easier to trigger this bug by making blk_queue_bypass_start() drain even when it loses the first bypass test to blk_cleanup_queue(); however, the bug has always been there even before the commit as blk_queue_bypass_start() could race against queue destruction, win the initial bypass test but perform the actual draining after blk_cleanup_queue() already destroyed all blkgs. Fix it by skippping calling into policy draining if all the blkgs are already gone. Signed-off-by: Tejun Heo Reported-by: Shirish Pargaonkar Reported-by: Sasha Levin Reported-by: Jet Chen Cc: stable@vger.kernel.org Tested-by: Shirish Pargaonkar Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'block') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index b9f4cc494ece..28d227c5ca77 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -872,6 +872,13 @@ void blkcg_drain_queue(struct request_queue *q) { lockdep_assert_held(q->queue_lock); + /* + * @q could be exiting and already have destroyed all blkgs as + * indicated by NULL root_blkg. If so, don't confuse policies. + */ + if (!q->root_blkg) + return; + blk_throtl_drain(q); } -- cgit v1.2.3-59-g8ed1b From d97a86c170b4e432f76db072a827fe30b4d6f659 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 5 Aug 2014 11:09:59 +0300 Subject: partitions: aix.c: off by one bug The lvip[] array has "state->limit" elements so the condition here should be >= instead of >. Fixes: 6ceea22bbbc8 ('partitions: add aix lvm partition support files') Signed-off-by: Dan Carpenter Acked-by: Philippe De Muyter Signed-off-by: Jens Axboe --- block/partitions/aix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/partitions/aix.c b/block/partitions/aix.c index 0a6ed546331d..f3ed7b2d89bf 100644 --- a/block/partitions/aix.c +++ b/block/partitions/aix.c @@ -253,7 +253,7 @@ int aix_partition(struct parsed_partitions *state) continue; } lv_ix = be16_to_cpu(p->lv_ix) - 1; - if (lv_ix > state->limit) { + if (lv_ix >= state->limit) { cur_lv_ix = -1; continue; } -- cgit v1.2.3-59-g8ed1b