From 934d9c23b4c7e31840a895ba4b7e88d6413c81f3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 28 Oct 2008 17:01:23 +1100 Subject: md: destroy partitions and notify udev when md array is stopped. md arrays are not currently destroyed when they are stopped - they remain in /sys/block. Last time I tried this I tripped over locking too much. A consequence of this is that udev doesn't remove anything from /dev. This is rather ugly. As an interim measure until proper device removal can be achieved, make sure all partitions are removed using the BLKRRPART ioctl, and send a KOBJ_CHANGE when an md array is stopped. Signed-off-by: NeilBrown --- drivers/md/md.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index b4162f6f1b79..9abf6ed16535 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3884,6 +3884,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) if (mode == 0) { mdk_rdev_t *rdev; struct list_head *tmp; + struct block_device *bdev; printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); @@ -3940,6 +3941,12 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) mddev->degraded = 0; mddev->barriers_work = 0; mddev->safemode = 0; + bdev = bdget_disk(mddev->gendisk, 0); + if (bdev) { + blkdev_ioctl(bdev, 0, BLKRRPART, 0); + bdput(bdev); + } + kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); } else if (mddev->pers) printk(KERN_INFO "md: %s switched to read-only mode.\n", -- cgit v1.2.3-59-g8ed1b From b34578a48459ed1bd5396631aaa4a65d6bcc7726 Mon Sep 17 00:00:00 2001 From: Ilpo Jarvinen Date: Thu, 30 Oct 2008 13:33:07 +0000 Subject: dm raid1: fix do_failures Missing braces. Commit 1f965b1943 (dm raid1: separate region_hash interface part1) broke it. Signed-off-by: Ilpo Jarvinen Signed-off-by: Alasdair G Kergon Cc: Heinz Mauelshagen --- drivers/md/dm-raid1.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 92dcc06832a4..9d7b53ed75b2 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -656,9 +656,10 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures) return; if (!ms->log_failure) { - while ((bio = bio_list_pop(failures))) + while ((bio = bio_list_pop(failures))) { ms->in_sync = 0; dm_rh_mark_nosync(ms->rh, bio, bio->bi_size, 0); + } return; } -- cgit v1.2.3-59-g8ed1b From 60c856c8e2f57a3f69c505735ef66e3719ea0bd6 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 30 Oct 2008 13:33:12 +0000 Subject: dm snapshot: fix register_snapshot deadlock register_snapshot() performs a GFP_KERNEL allocation while holding _origins_lock for write, but that could write out dirty pages onto a device that attempts to acquire _origins_lock for read, resulting in deadlock. So move the allocation up before taking the lock. This path is not performance-critical, so it doesn't matter that we allocate memory and free it if we find that we won't need it. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-snap.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index b2d9d1ac28ad..746603b42f86 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -229,19 +229,21 @@ static void __insert_origin(struct origin *o) */ static int register_snapshot(struct dm_snapshot *snap) { - struct origin *o; + struct origin *o, *new_o; struct block_device *bdev = snap->origin->bdev; + new_o = kmalloc(sizeof(*new_o), GFP_KERNEL); + if (!new_o) + return -ENOMEM; + down_write(&_origins_lock); o = __lookup_origin(bdev); - if (!o) { + if (o) + kfree(new_o); + else { /* New origin */ - o = kmalloc(sizeof(*o), GFP_KERNEL); - if (!o) { - up_write(&_origins_lock); - return -ENOMEM; - } + o = new_o; /* Initialise the struct */ INIT_LIST_HEAD(&o->snapshots); -- cgit v1.2.3-59-g8ed1b From 879129d208f725267366296b631aef31409cf304 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 30 Oct 2008 13:33:16 +0000 Subject: dm snapshot: wait for chunks in destructor If there are several snapshots sharing an origin and one is removed while the origin is being written to, the snapshot's mempool may get deleted while elements are still referenced. Prior to dm-snapshot-use-per-device-mempools.patch the pending exceptions may still have been referenced after the snapshot was destroyed, but this was not a problem because the shared mempool was still there. This patch fixes the problem by tracking the number of mempool elements in use. The scenario: - You have an origin and two snapshots 1 and 2. - Someone writes to the origin. - It creates two exceptions in the snapshots, snapshot 1 will be primary exception, snapshot 2's pending_exception->primary_pe will point to the exception in snapshot 1. - The exceptions are being relocated, relocation of exception 1 finishes (but it's pending_exception is still allocated, because it is referenced by an exception from snapshot 2) - The user lvremoves snapshot 1 --- it calls just suspend (does nothing) and destructor. md->pending is zero (there is no I/O submitted to the snapshot by md layer), so it won't help us. - The destructor waits for kcopyd jobs to finish on snapshot 1 --- but there are none. - The destructor on snapshot 1 cleans up everything. - The relocation of exception on snapshot 2 finishes, it drops reference on primary_pe. This frees its primary_pe pointer. Primary_pe points to pending exception created for snapshot 1. So it frees memory into non-existing mempool. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-snap.c | 16 +++++++++++++++- drivers/md/dm-snap.h | 2 ++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 746603b42f86..6c96db26b87c 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -370,6 +370,7 @@ static struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snaps struct dm_snap_pending_exception *pe = mempool_alloc(s->pending_pool, GFP_NOIO); + atomic_inc(&s->pending_exceptions_count); pe->snap = s; return pe; @@ -377,7 +378,11 @@ static struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snaps static void free_pending_exception(struct dm_snap_pending_exception *pe) { - mempool_free(pe, pe->snap->pending_pool); + struct dm_snapshot *s = pe->snap; + + mempool_free(pe, s->pending_pool); + smp_mb__before_atomic_dec(); + atomic_dec(&s->pending_exceptions_count); } static void insert_completed_exception(struct dm_snapshot *s, @@ -602,6 +607,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) s->valid = 1; s->active = 0; + atomic_set(&s->pending_exceptions_count, 0); init_rwsem(&s->lock); spin_lock_init(&s->pe_lock); s->ti = ti; @@ -728,6 +734,14 @@ static void snapshot_dtr(struct dm_target *ti) /* After this returns there can be no new kcopyd jobs. */ unregister_snapshot(s); + while (atomic_read(&s->pending_exceptions_count)) + yield(); + /* + * Ensure instructions in mempool_destroy aren't reordered + * before atomic_read. + */ + smp_mb(); + #ifdef CONFIG_DM_DEBUG for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i])); diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h index f07315fe2362..99c0106ede2d 100644 --- a/drivers/md/dm-snap.h +++ b/drivers/md/dm-snap.h @@ -160,6 +160,8 @@ struct dm_snapshot { mempool_t *pending_pool; + atomic_t pending_exceptions_count; + struct exception_table pending; struct exception_table complete; -- cgit v1.2.3-59-g8ed1b From cb3ac42b8af357fdd9ad838234245b39e5bdb7fe Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 6 Nov 2008 17:28:01 +1100 Subject: md: revert the recent addition of a call to the BLKRRPART ioctl. It turns out that it is only safe to call blkdev_ioctl when the device is actually open (as ->bd_disk is set to NULL on last close). And it is quite possible for do_md_stop to be called when the device is not open. So discard the call to blkdev_ioctl(BLKRRPART) which was added in commit 934d9c23b4c7e31840a895ba4b7e88d6413c81f3 It is just as easy to call this ioctl from userspace when needed (on mdadm -S) so leave it out of the kernel Signed-off-by: NeilBrown --- drivers/md/md.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index 9abf6ed16535..1b1d32694f6f 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3884,7 +3884,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) if (mode == 0) { mdk_rdev_t *rdev; struct list_head *tmp; - struct block_device *bdev; printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); @@ -3941,11 +3940,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) mddev->degraded = 0; mddev->barriers_work = 0; mddev->safemode = 0; - bdev = bdget_disk(mddev->gendisk, 0); - if (bdev) { - blkdev_ioctl(bdev, 0, BLKRRPART, 0); - bdput(bdev); - } kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); } else if (mddev->pers) -- cgit v1.2.3-59-g8ed1b From a53a6c85756339f82ff19e001e90cfba2d6299a8 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 6 Nov 2008 17:28:20 +1100 Subject: md: fix bug in raid10 recovery. Adding a spare to a raid10 doesn't cause recovery to start. This is due to an silly type in commit 6c2fce2ef6b4821c21b5c42c7207cb9cf8c87eda and so is a bug in 2.6.27 and .28-rc. Thanks to Thomas Backlund for bisecting to find this. Cc: Thomas Backlund Cc: stable@kernel.org Signed-off-by: NeilBrown --- drivers/md/raid10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index da5129a24b18..970a96ef9b18 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1137,7 +1137,7 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) if (!enough(conf)) return -EINVAL; - if (rdev->raid_disk) + if (rdev->raid_disk >= 0) first = last = rdev->raid_disk; if (rdev->saved_raid_disk >= 0 && -- cgit v1.2.3-59-g8ed1b From f1cd14ae52985634d0389e934eba25b5ecf24565 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Thu, 6 Nov 2008 19:41:24 +1100 Subject: md: linear: Fix a division by zero bug for very small arrays. We currently oops with a divide error on starting a linear software raid array consisting of at least two very small (< 500K) devices. The bug is caused by the calculation of the hash table size which tries to compute sector_div(sz, base) with "base" being zero due to the small size of the component devices of the array. Fix this by requiring the hash spacing to be at least one which implies that also "base" is non-zero. This bug has existed since about 2.6.14. Cc: stable@kernel.org Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/linear.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/md') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 190147c79e79..3b90c5c924ec 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -148,6 +148,8 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) min_sectors = conf->array_sectors; sector_div(min_sectors, PAGE_SIZE/sizeof(struct dev_info *)); + if (min_sectors == 0) + min_sectors = 1; /* min_sectors is the minimum spacing that will fit the hash * table in one PAGE. This may be much smaller than needed. -- cgit v1.2.3-59-g8ed1b From 18776c7316545482a02bfaa2629a2aa1afc48357 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 13 Nov 2008 23:38:52 +0000 Subject: dm raid1: flush workqueue before destruction We queue work on keventd queue --- so this queue must be flushed in the destructor. Otherwise, keventd could access mirror_set after it was freed. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon Cc: stable@kernel.org --- drivers/md/dm-raid1.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/md') diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 9d7b53ed75b2..ec43f9fa4b2a 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1032,6 +1032,7 @@ static void mirror_dtr(struct dm_target *ti) del_timer_sync(&ms->timer); flush_workqueue(ms->kmirrord_wq); + flush_scheduled_work(); dm_kcopyd_client_destroy(ms->kcopyd_client); destroy_workqueue(ms->kmirrord_wq); free_context(ms, ti, ms->nr_mirrors); -- cgit v1.2.3-59-g8ed1b From 6edebdee48729ab4ba564bbfcb8dbf6a6cd68a39 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Thu, 13 Nov 2008 23:38:56 +0000 Subject: dm stripe: fix init failure Don't proceed if dm_stripe_init() fails to register itself as a dm target. Signed-off-by: Heinz Mauelshagen Signed-off-by: Alasdair G Kergon --- drivers/md/dm-stripe.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index a2d068dbe9e2..9e4ef88d421e 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -320,8 +320,10 @@ int __init dm_stripe_init(void) int r; r = dm_register_target(&stripe_target); - if (r < 0) + if (r < 0) { DMWARN("target registration failed"); + return r; + } kstriped = create_singlethread_workqueue("kstriped"); if (!kstriped) { -- cgit v1.2.3-59-g8ed1b From b81aa1c79201cb424114fd198607951900babe18 Mon Sep 17 00:00:00 2001 From: Chandra Seetharaman Date: Thu, 13 Nov 2008 23:39:00 +0000 Subject: dm mpath: avoid attempting to activate null path Path activation code is called even when the pgpath is NULL. This could lead to a panic in activate_path(). Such a panic is seen in -rt kernel. This problem has been there before the pg_init() was moved to a workqueue. Signed-off-by: Chandra Seetharaman Signed-off-by: Alasdair G Kergon --- drivers/md/dm-mpath.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 4840733cd903..58b1015260fa 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -441,13 +441,13 @@ static void process_queued_ios(struct work_struct *work) __choose_pgpath(m); pgpath = m->current_pgpath; - m->pgpath_to_activate = m->current_pgpath; if ((pgpath && !m->queue_io) || (!pgpath && !m->queue_if_no_path)) must_queue = 0; - if (m->pg_init_required && !m->pg_init_in_progress) { + if (m->pg_init_required && !m->pg_init_in_progress && pgpath) { + m->pgpath_to_activate = pgpath; m->pg_init_count++; m->pg_init_required = 0; m->pg_init_in_progress = 1; -- cgit v1.2.3-59-g8ed1b From 14e98c5ca8bed825f65cbf11cb0ffd2c09dac2f4 Mon Sep 17 00:00:00 2001 From: Chandra Seetharaman Date: Thu, 13 Nov 2008 23:39:06 +0000 Subject: dm mpath: warn if args ignored Currently dm ignores the parameters provided to hardware handlers without providing any notifications to the user. This patch just prints a warning message so that the user knows that the arguments are ignored. Signed-off-by: Chandra Seetharaman Signed-off-by: Alasdair G Kergon --- drivers/md/dm-mpath.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/md') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 58b1015260fa..3d7f4923cd13 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -708,6 +708,10 @@ static int parse_hw_handler(struct arg_set *as, struct multipath *m) m->hw_handler_name = NULL; return -EINVAL; } + + if (hw_argc > 1) + DMWARN("Ignoring user-specified arguments for " + "hardware handler \"%s\"", m->hw_handler_name); consume(as, hw_argc - 1); return 0; -- cgit v1.2.3-59-g8ed1b From d221d2e77696e70e94b13989ea15db2ba5b34f8e Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 13 Nov 2008 23:39:10 +0000 Subject: dm: move pending queue wake_up end_io_acct This doesn't fix any bug, just moves wake_up immediately after decrementing md->pending, for better code readability. It must be clear to anyone manipulating md->pending to wake up the queue if md->pending reaches zero, so move the wakeup as close to the decrementing as possible. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 6963ad148408..dc25d8a07bc7 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -375,7 +375,7 @@ static void start_io_acct(struct dm_io *io) dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending); } -static int end_io_acct(struct dm_io *io) +static void end_io_acct(struct dm_io *io) { struct mapped_device *md = io->md; struct bio *bio = io->bio; @@ -391,7 +391,9 @@ static int end_io_acct(struct dm_io *io) dm_disk(md)->part0.in_flight = pending = atomic_dec_return(&md->pending); - return !pending; + /* nudge anyone waiting on suspend queue */ + if (!pending) + wake_up(&md->wait); } /* @@ -499,9 +501,7 @@ static void dec_pending(struct dm_io *io, int error) spin_unlock_irqrestore(&io->md->pushback_lock, flags); } - if (end_io_acct(io)) - /* nudge anyone waiting on suspend queue */ - wake_up(&io->md->wait); + end_io_acct(io); if (io->error != DM_ENDIO_REQUEUE) { blk_add_trace_bio(io->md->queue, io->bio, -- cgit v1.2.3-59-g8ed1b From 8a57dfc6f943c92b861c9a19b0c86ddcb2aba768 Mon Sep 17 00:00:00 2001 From: Chandra Seetharaman Date: Thu, 13 Nov 2008 23:39:14 +0000 Subject: dm: avoid destroying table in dm_any_congested dm_any_congested() just checks for the DMF_BLOCK_IO and has no code to make sure that suspend waits for dm_any_congested() to complete. This patch adds such a check. Without it, a race can occur with dm_table_put() attempting to destroying the table in the wrong thread, the one running dm_any_congested() which is meant to be quick and return immediately. Two examples of problems: 1. Sleeping functions called from congested code, the caller of which holds a spin lock. 2. An ABBA deadlock between pdflush and multipathd. The two locks in contention are inode lock and kernel lock. Signed-off-by: Chandra Seetharaman Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index dc25d8a07bc7..c99e4728ff41 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -937,16 +937,24 @@ static void dm_unplug_all(struct request_queue *q) static int dm_any_congested(void *congested_data, int bdi_bits) { - int r; - struct mapped_device *md = (struct mapped_device *) congested_data; - struct dm_table *map = dm_get_table(md); + int r = bdi_bits; + struct mapped_device *md = congested_data; + struct dm_table *map; - if (!map || test_bit(DMF_BLOCK_IO, &md->flags)) - r = bdi_bits; - else - r = dm_table_any_congested(map, bdi_bits); + atomic_inc(&md->pending); + + if (!test_bit(DMF_BLOCK_IO, &md->flags)) { + map = dm_get_table(md); + if (map) { + r = dm_table_any_congested(map, bdi_bits); + dm_table_put(map); + } + } + + if (!atomic_dec_return(&md->pending)) + /* nudge anyone waiting on suspend queue */ + wake_up(&md->wait); - dm_table_put(map); return r; } -- cgit v1.2.3-59-g8ed1b From 0e435ac26e3f951d83338ed3d4ab7dc0fe0055bc Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Wed, 3 Dec 2008 12:55:08 +0100 Subject: block: fix setting of max_segment_size and seg_boundary mask Fix setting of max_segment_size and seg_boundary mask for stacked md/dm devices. When stacking devices (LVM over MD over SCSI) some of the request queue parameters are not set up correctly in some cases by default, namely max_segment_size and and seg_boundary mask. If you create MD device over SCSI, these attributes are zeroed. Problem become when there is over this mapping next device-mapper mapping - queue attributes are set in DM this way: request_queue max_segment_size seg_boundary_mask SCSI 65536 0xffffffff MD RAID1 0 0 LVM 65536 -1 (64bit) Unfortunately bio_add_page (resp. bio_phys_segments) calculates number of physical segments according to these parameters. During the generic_make_request() is segment cout recalculated and can increase bio->bi_phys_segments count over the allowed limit. (After bio_clone() in stack operation.) Thi is specially problem in CCISS driver, where it produce OOPS here BUG_ON(creq->nr_phys_segments > MAXSGENTRIES); (MAXSEGENTRIES is 31 by default.) Sometimes even this command is enough to cause oops: dd iflag=direct if=/dev// of=/dev/null bs=128000 count=10 This command generates bios with 250 sectors, allocated in 32 4k-pages (last page uses only 1024 bytes). For LVM layer, it allocates bio with 31 segments (still OK for CCISS), unfortunatelly on lower layer it is recalculated to 32 segments and this violates CCISS restriction and triggers BUG_ON(). The patch tries to fix it by: * initializing attributes above in queue request constructor blk_queue_make_request() * make sure that blk_queue_stack_limits() inherits setting (DM uses its own function to set the limits because it blk_queue_stack_limits() was introduced later. It should probably switch to use generic stack limit function too.) * sets the default seg_boundary value in one place (blkdev.h) * use this mask as default in DM (instead of -1, which differs in 64bit) Bugs related to this: https://bugzilla.redhat.com/show_bug.cgi?id=471639 http://bugzilla.kernel.org/show_bug.cgi?id=8672 Signed-off-by: Milan Broz Reviewed-by: Alasdair G Kergon Cc: Neil Brown Cc: FUJITA Tomonori Cc: Tejun Heo Cc: Mike Miller Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- block/blk-settings.c | 4 ++++ drivers/md/dm-table.c | 2 +- include/linux/blkdev.h | 2 ++ 4 files changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/block/blk-core.c b/block/blk-core.c index 7a779d7c69c9..c36aa98fafa3 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -592,7 +592,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) 1 << QUEUE_FLAG_STACKABLE); q->queue_lock = lock; - blk_queue_segment_boundary(q, 0xffffffff); + blk_queue_segment_boundary(q, BLK_SEG_BOUNDARY_MASK); blk_queue_make_request(q, __make_request); blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE); diff --git a/block/blk-settings.c b/block/blk-settings.c index 41392fbe19ff..afa55e14e278 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -125,6 +125,9 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn) q->nr_requests = BLKDEV_MAX_RQ; blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); + blk_queue_segment_boundary(q, BLK_SEG_BOUNDARY_MASK); + blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE); + q->make_request_fn = mfn; q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; @@ -314,6 +317,7 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) /* zero is "infinity" */ t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); + t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, b->seg_boundary_mask); t->max_phys_segments = min(t->max_phys_segments, b->max_phys_segments); t->max_hw_segments = min(t->max_hw_segments, b->max_hw_segments); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index a63161aec487..04e5fd742c2c 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -668,7 +668,7 @@ static void check_for_valid_limits(struct io_restrictions *rs) if (!rs->max_segment_size) rs->max_segment_size = MAX_SEGMENT_SIZE; if (!rs->seg_boundary_mask) - rs->seg_boundary_mask = -1; + rs->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; if (!rs->bounce_pfn) rs->bounce_pfn = -1; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9cc7cc5fdce1..6dcd30d806cd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -918,6 +918,8 @@ extern void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter); #define MAX_SEGMENT_SIZE 65536 +#define BLK_SEG_BOUNDARY_MASK 0xFFFFFFFFUL + #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) static inline int queue_hardsect_size(struct request_queue *q) -- cgit v1.2.3-59-g8ed1b