From 5f027a3bf184d1d36e68745f7cd3718a8b879cc0 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Feb 2015 14:09:12 +0000 Subject: dm thin: fix to consistently zero-fill reads to unprovisioned blocks It was always intended that a read to an unprovisioned block will return zeroes regardless of whether the pool is in read-only or read-write mode. thin_bio_map() was inconsistent with its handling of such reads when the pool is in read-only mode, it now properly zero-fills the bios it returns in response to unprovisioned block reads. Eliminate thin_bio_map()'s special read-only mode handling of -ENODATA and just allow the IO to be deferred to the worker which will result in pool->process_bio() handling the IO (which already properly zero-fills reads to unprovisioned blocks). Reported-by: Eric Sandeen Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org --- drivers/md/dm-thin.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 654773cb1eee..921aafd12aee 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2358,17 +2358,6 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; case -ENODATA: - if (get_pool_mode(tc->pool) == PM_READ_ONLY) { - /* - * This block isn't provisioned, and we have no way - * of doing so. - */ - handle_unserviceable_bio(tc->pool, bio); - cell_defer_no_holder(tc, virt_cell); - return DM_MAPIO_SUBMITTED; - } - /* fall through */ - case -EWOULDBLOCK: thin_defer_cell(tc, virt_cell); return DM_MAPIO_SUBMITTED; -- cgit v1.2.3-59-g8ed1b From ab7c7bb6f4ab95dbca96fcfc4463cd69843e3e24 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 27 Feb 2015 14:04:27 -0500 Subject: dm: hold suspend_lock while suspending device during device deletion __dm_destroy() must take the suspend_lock so that its presuspend and postsuspend calls do not race with an internal suspend. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org --- drivers/md/dm.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 73f28802dc7a..e79d5ccfda64 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2638,10 +2638,16 @@ static void __dm_destroy(struct mapped_device *md, bool wait) if (dm_request_based(md)) flush_kthread_worker(&md->kworker); + /* + * Take suspend_lock so that presuspend and postsuspend methods + * do not race with internal suspend. + */ + mutex_lock(&md->suspend_lock); if (!dm_suspended_md(md)) { dm_table_presuspend_targets(map); dm_table_postsuspend_targets(map); } + mutex_unlock(&md->suspend_lock); /* dm_put_live_table must be before msleep, otherwise deadlock is possible */ dm_put_live_table(md, srcu_idx); -- cgit v1.2.3-59-g8ed1b From b735fede8d957d9d255e9c5cf3964cfa59799637 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 26 Feb 2015 11:40:35 -0500 Subject: dm snapshot: suspend origin when doing exception handover In the function snapshot_resume we perform exception store handover. If there is another active snapshot target, the exception store is moved from this target to the target that is being resumed. The problem is that if there is some pending exception, it will point to an incorrect exception store after that handover, causing a crash due to dm-snap-persistent.c:get_exception()'s BUG_ON. This bug can be triggered by repeatedly changing snapshot permissions with "lvchange -p r" and "lvchange -p rw" while there are writes on the associated origin device. To fix this bug, we must suspend the origin device when doing the exception store handover to make sure that there are no pending exceptions: - introduce _origin_hash that keeps track of dm_origin structures. - introduce functions __lookup_dm_origin, __insert_dm_origin and __remove_dm_origin that manipulate the origin hash. - modify snapshot_resume so that it calls dm_internal_suspend_fast() and dm_internal_resume_fast() on the origin device. NOTE to stable@ people: When backporting to kernels 3.12-3.18, use dm_internal_suspend and dm_internal_resume instead of dm_internal_suspend_fast and dm_internal_resume_fast. When backporting to kernels older than 3.12, you need to pick functions dm_internal_suspend and dm_internal_resume from the commit fd2ed4d252701d3bbed4cd3e3d267ad469bb832a. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org --- drivers/md/dm-snap.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++----- drivers/md/dm.c | 2 ++ 2 files changed, 86 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 8b204ae216ab..c2bf822bad6f 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -20,6 +20,8 @@ #include #include +#include "dm.h" + #include "dm-exception-store.h" #define DM_MSG_PREFIX "snapshots" @@ -290,6 +292,16 @@ struct origin { struct list_head snapshots; }; +/* + * This structure is allocated for each origin target + */ +struct dm_origin { + struct dm_dev *dev; + struct dm_target *ti; + unsigned split_boundary; + struct list_head hash_list; +}; + /* * Size of the hash table for origin volumes. If we make this * the size of the minors list then it should be nearly perfect @@ -297,6 +309,7 @@ struct origin { #define ORIGIN_HASH_SIZE 256 #define ORIGIN_MASK 0xFF static struct list_head *_origins; +static struct list_head *_dm_origins; static struct rw_semaphore _origins_lock; static DECLARE_WAIT_QUEUE_HEAD(_pending_exceptions_done); @@ -310,12 +323,22 @@ static int init_origin_hash(void) _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head), GFP_KERNEL); if (!_origins) { - DMERR("unable to allocate memory"); + DMERR("unable to allocate memory for _origins"); return -ENOMEM; } - for (i = 0; i < ORIGIN_HASH_SIZE; i++) INIT_LIST_HEAD(_origins + i); + + _dm_origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head), + GFP_KERNEL); + if (!_dm_origins) { + DMERR("unable to allocate memory for _dm_origins"); + kfree(_origins); + return -ENOMEM; + } + for (i = 0; i < ORIGIN_HASH_SIZE; i++) + INIT_LIST_HEAD(_dm_origins + i); + init_rwsem(&_origins_lock); return 0; @@ -324,6 +347,7 @@ static int init_origin_hash(void) static void exit_origin_hash(void) { kfree(_origins); + kfree(_dm_origins); } static unsigned origin_hash(struct block_device *bdev) @@ -350,6 +374,30 @@ static void __insert_origin(struct origin *o) list_add_tail(&o->hash_list, sl); } +static struct dm_origin *__lookup_dm_origin(struct block_device *origin) +{ + struct list_head *ol; + struct dm_origin *o; + + ol = &_dm_origins[origin_hash(origin)]; + list_for_each_entry (o, ol, hash_list) + if (bdev_equal(o->dev->bdev, origin)) + return o; + + return NULL; +} + +static void __insert_dm_origin(struct dm_origin *o) +{ + struct list_head *sl = &_dm_origins[origin_hash(o->dev->bdev)]; + list_add_tail(&o->hash_list, sl); +} + +static void __remove_dm_origin(struct dm_origin *o) +{ + list_del(&o->hash_list); +} + /* * _origins_lock must be held when calling this function. * Returns number of snapshots registered using the supplied cow device, plus: @@ -1841,8 +1889,20 @@ static void snapshot_resume(struct dm_target *ti) { struct dm_snapshot *s = ti->private; struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; + struct dm_origin *o; + struct mapped_device *origin_md = NULL; down_read(&_origins_lock); + + o = __lookup_dm_origin(s->origin->bdev); + if (o) + origin_md = dm_table_get_md(o->ti->table); + if (origin_md == dm_table_get_md(ti->table)) + origin_md = NULL; + + if (origin_md) + dm_internal_suspend_fast(origin_md); + (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); if (snap_src && snap_dest) { down_write(&snap_src->lock); @@ -1851,6 +1911,10 @@ static void snapshot_resume(struct dm_target *ti) up_write(&snap_dest->lock); up_write(&snap_src->lock); } + + if (origin_md) + dm_internal_resume_fast(origin_md); + up_read(&_origins_lock); /* Now we have correct chunk size, reregister */ @@ -2133,11 +2197,6 @@ static int origin_write_extent(struct dm_snapshot *merging_snap, * Origin: maps a linear range of a device, with hooks for snapshotting. */ -struct dm_origin { - struct dm_dev *dev; - unsigned split_boundary; -}; - /* * Construct an origin mapping: * The context for an origin is merely a 'struct dm_dev *' @@ -2166,6 +2225,7 @@ static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_open; } + o->ti = ti; ti->private = o; ti->num_flush_bios = 1; @@ -2180,6 +2240,7 @@ bad_alloc: static void origin_dtr(struct dm_target *ti) { struct dm_origin *o = ti->private; + dm_put_device(ti, o->dev); kfree(o); } @@ -2216,6 +2277,19 @@ static void origin_resume(struct dm_target *ti) struct dm_origin *o = ti->private; o->split_boundary = get_origin_minimum_chunksize(o->dev->bdev); + + down_write(&_origins_lock); + __insert_dm_origin(o); + up_write(&_origins_lock); +} + +static void origin_postsuspend(struct dm_target *ti) +{ + struct dm_origin *o = ti->private; + + down_write(&_origins_lock); + __remove_dm_origin(o); + up_write(&_origins_lock); } static void origin_status(struct dm_target *ti, status_type_t type, @@ -2258,12 +2332,13 @@ static int origin_iterate_devices(struct dm_target *ti, static struct target_type origin_target = { .name = "snapshot-origin", - .version = {1, 8, 1}, + .version = {1, 9, 0}, .module = THIS_MODULE, .ctr = origin_ctr, .dtr = origin_dtr, .map = origin_map, .resume = origin_resume, + .postsuspend = origin_postsuspend, .status = origin_status, .merge = origin_merge, .iterate_devices = origin_iterate_devices, @@ -2271,7 +2346,7 @@ static struct target_type origin_target = { static struct target_type snapshot_target = { .name = "snapshot", - .version = {1, 12, 0}, + .version = {1, 13, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, diff --git a/drivers/md/dm.c b/drivers/md/dm.c index e79d5ccfda64..6e2b2e97abe9 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -3121,6 +3121,7 @@ void dm_internal_suspend_fast(struct mapped_device *md) flush_workqueue(md->wq); dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); } +EXPORT_SYMBOL_GPL(dm_internal_suspend_fast); void dm_internal_resume_fast(struct mapped_device *md) { @@ -3132,6 +3133,7 @@ void dm_internal_resume_fast(struct mapped_device *md) done: mutex_unlock(&md->suspend_lock); } +EXPORT_SYMBOL_GPL(dm_internal_resume_fast); /*----------------------------------------------------------------- * Event notification. -- cgit v1.2.3-59-g8ed1b From 09ee96b21456883e108c3b00597bb37ec512151b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 26 Feb 2015 11:41:28 -0500 Subject: dm snapshot: suspend merging snapshot when doing exception handover The "dm snapshot: suspend origin when doing exception handover" commit fixed a exception store handover bug associated with pending exceptions to the "snapshot-origin" target. However, a similar problem exists in snapshot merging. When snapshot merging is in progress, we use the target "snapshot-merge" instead of "snapshot-origin". Consequently, during exception store handover, we must find the snapshot-merge target and suspend its associated mapped_device. To avoid lockdep warnings, the target must be suspended and resumed without holding _origins_lock. Introduce a dm_hold() function that grabs a reference on a mapped_device, but unlike dm_get(), it doesn't crash if the device has the DMF_FREEING flag set, it returns an error in this case. In snapshot_resume() we grab the reference to the origin device using dm_hold() while holding _origins_lock (_origins_lock guarantees that the device won't disappear). Then we release _origins_lock, suspend the device and grab _origins_lock again. NOTE to stable@ people: When backporting to kernels 3.18 and older, use dm_internal_suspend and dm_internal_resume instead of dm_internal_suspend_fast and dm_internal_resume_fast. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org --- drivers/md/dm-snap.c | 35 +++++++++++++++++++++++++++++------ drivers/md/dm.c | 13 +++++++++++++ include/linux/device-mapper.h | 1 + 3 files changed, 43 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index c2bf822bad6f..f83a0f3fc365 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1888,20 +1888,39 @@ static int snapshot_preresume(struct dm_target *ti) static void snapshot_resume(struct dm_target *ti) { struct dm_snapshot *s = ti->private; - struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; + struct dm_snapshot *snap_src = NULL, *snap_dest = NULL, *snap_merging = NULL; struct dm_origin *o; struct mapped_device *origin_md = NULL; + bool must_restart_merging = false; down_read(&_origins_lock); o = __lookup_dm_origin(s->origin->bdev); if (o) origin_md = dm_table_get_md(o->ti->table); + if (!origin_md) { + (void) __find_snapshots_sharing_cow(s, NULL, NULL, &snap_merging); + if (snap_merging) + origin_md = dm_table_get_md(snap_merging->ti->table); + } if (origin_md == dm_table_get_md(ti->table)) origin_md = NULL; + if (origin_md) { + if (dm_hold(origin_md)) + origin_md = NULL; + } - if (origin_md) + up_read(&_origins_lock); + + if (origin_md) { dm_internal_suspend_fast(origin_md); + if (snap_merging && test_bit(RUNNING_MERGE, &snap_merging->state_bits)) { + must_restart_merging = true; + stop_merge(snap_merging); + } + } + + down_read(&_origins_lock); (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); if (snap_src && snap_dest) { @@ -1912,11 +1931,15 @@ static void snapshot_resume(struct dm_target *ti) up_write(&snap_src->lock); } - if (origin_md) - dm_internal_resume_fast(origin_md); - up_read(&_origins_lock); + if (origin_md) { + if (must_restart_merging) + start_merge(snap_merging); + dm_internal_resume_fast(origin_md); + dm_put(origin_md); + } + /* Now we have correct chunk size, reregister */ reregister_snapshot(s); @@ -2360,7 +2383,7 @@ static struct target_type snapshot_target = { static struct target_type merge_target = { .name = dm_snapshot_merge_target_name, - .version = {1, 2, 0}, + .version = {1, 3, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 6e2b2e97abe9..9b641b38b857 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2616,6 +2616,19 @@ void dm_get(struct mapped_device *md) BUG_ON(test_bit(DMF_FREEING, &md->flags)); } +int dm_hold(struct mapped_device *md) +{ + spin_lock(&_minor_lock); + if (test_bit(DMF_FREEING, &md->flags)) { + spin_unlock(&_minor_lock); + return -EBUSY; + } + dm_get(md); + spin_unlock(&_minor_lock); + return 0; +} +EXPORT_SYMBOL_GPL(dm_hold); + const char *dm_device_name(struct mapped_device *md) { return md->name; diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 2646aed1d3fe..fd23978d93fe 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -375,6 +375,7 @@ int dm_create(int minor, struct mapped_device **md); */ struct mapped_device *dm_get_md(dev_t dev); void dm_get(struct mapped_device *md); +int dm_hold(struct mapped_device *md); void dm_put(struct mapped_device *md); /* -- cgit v1.2.3-59-g8ed1b From e5db29806b99ce2b2640d2e4d4fcb983cea115c5 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 27 Feb 2015 10:44:38 -0800 Subject: dm io: deal with wandering queue limits when handling REQ_DISCARD and REQ_WRITE_SAME Since it's possible for the discard and write same queue limits to change while the upper level command is being sliced and diced, fix up both of them (a) to reject IO if the special command is unsupported at the start of the function and (b) read the limits once and let the commands error out on their own if the status happens to change. Signed-off-by: Darrick J. Wong Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org --- drivers/md/dm-io.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 37de0173b6d2..74adcd2c967e 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -289,9 +289,16 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, struct request_queue *q = bdev_get_queue(where->bdev); unsigned short logical_block_size = queue_logical_block_size(q); sector_t num_sectors; + unsigned int uninitialized_var(special_cmd_max_sectors); - /* Reject unsupported discard requests */ - if ((rw & REQ_DISCARD) && !blk_queue_discard(q)) { + /* + * Reject unsupported discard and write same requests. + */ + if (rw & REQ_DISCARD) + special_cmd_max_sectors = q->limits.max_discard_sectors; + else if (rw & REQ_WRITE_SAME) + special_cmd_max_sectors = q->limits.max_write_same_sectors; + if ((rw & (REQ_DISCARD | REQ_WRITE_SAME)) && special_cmd_max_sectors == 0) { dec_count(io, region, -EOPNOTSUPP); return; } @@ -317,7 +324,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, store_io_and_region_in_bio(bio, io, region); if (rw & REQ_DISCARD) { - num_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining); + num_sectors = min_t(sector_t, special_cmd_max_sectors, remaining); bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT; remaining -= num_sectors; } else if (rw & REQ_WRITE_SAME) { @@ -326,7 +333,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, */ dp->get_page(dp, &page, &len, &offset); bio_add_page(bio, page, logical_block_size, offset); - num_sectors = min_t(sector_t, q->limits.max_write_same_sectors, remaining); + num_sectors = min_t(sector_t, special_cmd_max_sectors, remaining); bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT; offset = 0; -- cgit v1.2.3-59-g8ed1b