From ed209584c38fb74b7eecc03e5b1bfe674e591bd8 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 24 Apr 2012 10:23:14 +1000 Subject: md: don't call ->add_disk unless there is good reason. Commit 7bfec5f35c68121e7b18 md/raid5: If there is a spare and a want_replacement device, start replacement. cause md_check_recovery to call ->add_disk much more often. Instead of only when the array is degraded, it is now called whenever md_check_recovery finds anything useful to do, which includes updating the metadata for clean<->dirty transition. This causes unnecessary work, and causes info messages from ->add_disk to be reported much too often. So refine md_check_recovery to only do any actual recovery checking (including ->add_disk) if MD_RECOVERY_NEEDED is set. This fix is suitable for 3.3.y: Cc: stable@vger.kernel.org Reported-by: Jan Ceuleers Signed-off-by: NeilBrown --- drivers/md/md.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index b572e1e386ce..8beb19c3bb44 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7560,14 +7560,14 @@ void md_check_recovery(struct mddev *mddev) * any transients in the value of "sync_action". */ set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); - clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); /* Clear some bits that don't mean anything, but * might be left set */ clear_bit(MD_RECOVERY_INTR, &mddev->recovery); clear_bit(MD_RECOVERY_DONE, &mddev->recovery); - if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) + if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || + test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) goto unlock; /* no recovery is running. * remove any failed drives, then -- cgit v1.2.3-59-g8ed1b From 30b8aa9172dfeaac6d77897c67ee9f9fc574cdbb Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 24 Apr 2012 10:23:16 +1000 Subject: md: fix possible corruption of array metadata on shutdown. commit c744a65c1e2d59acc54333ce8 md: don't set md arrays to readonly on shutdown. removed the possibility of a 'BUG' when data is written to an array that has just been switched to read-only, but also introduced the possibility that the array metadata could be corrupted. If, when md_notify_reboot gets the mddev lock, the array is in a state where it is assembled but hasn't been started (as can happen if the personality module is not available, or in other unusual situations), then incorrect metadata will be written out making it impossible to re-assemble the array. So only call __md_stop_writes() if the array has actually been activated. This patch is needed for any stable kernel which has had the above commit applied. Cc: stable@vger.kernel.org Reported-by: Christoph Nelles Signed-off-by: NeilBrown --- drivers/md/md.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 8beb19c3bb44..477eb2e180c0 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8140,7 +8140,8 @@ static int md_notify_reboot(struct notifier_block *this, for_each_mddev(mddev, tmp) { if (mddev_trylock(mddev)) { - __md_stop_writes(mddev); + if (mddev->pers) + __md_stop_writes(mddev); mddev->safemode = 2; mddev_unlock(mddev); } -- cgit v1.2.3-59-g8ed1b From 0d9f4f135eb6dea06bdcb7065b1e4ff78274a5e9 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Wed, 16 May 2012 04:06:14 -0500 Subject: MD: Add del_timer_sync to mddev_suspend (fix nasty panic) Use del_timer_sync to remove timer before mddev_suspend finishes. We don't want a timer going off after an mddev_suspend is called. This is especially true with device-mapper, since it can call the destructor function immediately following a suspend. This results in the removal (kfree) of the structures upon which the timer depends - resulting in a very ugly panic. Therefore, we add a del_timer_sync to mddev_suspend to prevent this. Cc: stable@vger.kernel.org Signed-off-by: NeilBrown --- drivers/md/md.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 477eb2e180c0..01233d855eb2 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -391,6 +391,8 @@ void mddev_suspend(struct mddev *mddev) synchronize_rcu(); wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0); mddev->pers->quiesce(mddev, 1); + + del_timer_sync(&mddev->safemode_timer); } EXPORT_SYMBOL_GPL(mddev_suspend); -- cgit v1.2.3-59-g8ed1b From b5e1b8cee7ad58a15d2fa79bcd7946acb592602d Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 21 May 2012 09:26:59 +1000 Subject: md: using GFP_NOIO to allocate bio for flush request A flush request is usually issued in transaction commit code path, so using GFP_KERNEL to allocate memory for flush request bio falls into the classic deadlock issue. This is suitable for any -stable kernel to which it applies as it avoids a possible deadlock. Cc: stable@vger.kernel.org Signed-off-by: Shaohua Li Signed-off-by: NeilBrown --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 01233d855eb2..2b30ffdb81b2 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -452,7 +452,7 @@ static void submit_flushes(struct work_struct *ws) atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending); rcu_read_unlock(); - bi = bio_alloc_mddev(GFP_KERNEL, 0, mddev); + bi = bio_alloc_mddev(GFP_NOIO, 0, mddev); bi->bi_end_io = md_end_flush; bi->bi_private = rdev; bi->bi_bdev = rdev->bdev; -- cgit v1.2.3-59-g8ed1b From 2c810cddc44d6f95cef75df3f07fc0850ff92417 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 21 May 2012 09:27:00 +1000 Subject: md: allow a reshape operation to be reversed. Currently a reshape operation always progresses from the start of the array to the end unless the number of devices is being reduced, in which case it progressed in the opposite direction. To reverse a partial reshape which changes the number of devices you can stop the array and re-assemble with the raid-disks numbers reversed and it will undo. However for a reshape that does not change the number of devices it is not possible to reverse the reshape in the middle - you have to wait until it completes. So add a 'reshape_direction' attribute with is either 'forwards' or 'backwards' and can be explicitly set when delta_disks is zero. This will become more important when we allow the data_offset to change in a reshape. Then the explicit statement of what direction is being used will be more useful. This can be enabled in raid5 trivially as it already supports reverse reshape and just needs to use a different trigger to request it. Signed-off-by: NeilBrown --- drivers/md/md.c | 67 +++++++++++++++++++++++++++++++++++++++++++++-- drivers/md/md.h | 1 + drivers/md/raid5.c | 23 ++++++++-------- include/linux/raid/md_p.h | 7 ++++- 4 files changed, 84 insertions(+), 14 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 2b30ffdb81b2..44bb1d52dd4c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -607,6 +607,7 @@ void mddev_init(struct mddev *mddev) init_waitqueue_head(&mddev->sb_wait); init_waitqueue_head(&mddev->recovery_wait); mddev->reshape_position = MaxSector; + mddev->reshape_backwards = 0; mddev->resync_min = 0; mddev->resync_max = MaxSector; mddev->level = LEVEL_NONE; @@ -1185,6 +1186,7 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev) mddev->events = ev1; mddev->bitmap_info.offset = 0; mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9; + mddev->reshape_backwards = 0; if (mddev->minor_version >= 91) { mddev->reshape_position = sb->reshape_position; @@ -1192,6 +1194,8 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev) mddev->new_level = sb->new_level; mddev->new_layout = sb->new_layout; mddev->new_chunk_sectors = sb->new_chunk >> 9; + if (mddev->delta_disks < 0) + mddev->reshape_backwards = 1; } else { mddev->reshape_position = MaxSector; mddev->delta_disks = 0; @@ -1645,7 +1649,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) mddev->events = ev1; mddev->bitmap_info.offset = 0; mddev->bitmap_info.default_offset = 1024 >> 9; - + mddev->reshape_backwards = 0; + mddev->recovery_cp = le64_to_cpu(sb->resync_offset); memcpy(mddev->uuid, sb->set_uuid, 16); @@ -1662,6 +1667,11 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) mddev->new_level = le32_to_cpu(sb->new_level); mddev->new_layout = le32_to_cpu(sb->new_layout); mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk); + if (mddev->delta_disks < 0 || + (mddev->delta_disks == 0 && + (le32_to_cpu(sb->feature_map) + & MD_FEATURE_RESHAPE_BACKWARDS))) + mddev->reshape_backwards = 1; } else { mddev->reshape_position = MaxSector; mddev->delta_disks = 0; @@ -1781,6 +1791,10 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev) sb->delta_disks = cpu_to_le32(mddev->delta_disks); sb->new_level = cpu_to_le32(mddev->new_level); sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors); + if (mddev->delta_disks == 0 && + mddev->reshape_backwards) + sb->feature_map + |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS); } if (rdev->badblocks.count == 0) @@ -3419,6 +3433,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len) mddev->new_chunk_sectors = mddev->chunk_sectors; mddev->raid_disks -= mddev->delta_disks; mddev->delta_disks = 0; + mddev->reshape_backwards = 0; module_put(pers->owner); printk(KERN_WARNING "md: %s: %s would not accept array\n", mdname(mddev), clevel); @@ -3492,6 +3507,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len) mddev->layout = mddev->new_layout; mddev->chunk_sectors = mddev->new_chunk_sectors; mddev->delta_disks = 0; + mddev->reshape_backwards = 0; mddev->degraded = 0; if (mddev->pers->sync_request == NULL) { /* this is now an array without redundancy, so @@ -3585,6 +3601,7 @@ raid_disks_store(struct mddev *mddev, const char *buf, size_t len) int olddisks = mddev->raid_disks - mddev->delta_disks; mddev->delta_disks = n - olddisks; mddev->raid_disks = n; + mddev->reshape_backwards = (mddev->delta_disks < 0); } else mddev->raid_disks = n; return rv ? rv : len; @@ -4436,6 +4453,7 @@ reshape_position_store(struct mddev *mddev, const char *buf, size_t len) return -EINVAL; mddev->reshape_position = new; mddev->delta_disks = 0; + mddev->reshape_backwards = 0; mddev->new_level = mddev->level; mddev->new_layout = mddev->layout; mddev->new_chunk_sectors = mddev->chunk_sectors; @@ -4446,6 +4464,42 @@ static struct md_sysfs_entry md_reshape_position = __ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show, reshape_position_store); +static ssize_t +reshape_direction_show(struct mddev *mddev, char *page) +{ + return sprintf(page, "%s\n", + mddev->reshape_backwards ? "backwards" : "forwards"); +} + +static ssize_t +reshape_direction_store(struct mddev *mddev, const char *buf, size_t len) +{ + int backwards = 0; + if (cmd_match(buf, "forwards")) + backwards = 0; + else if (cmd_match(buf, "backwards")) + backwards = 1; + else + return -EINVAL; + if (mddev->reshape_backwards == backwards) + return len; + + /* check if we are allowed to change */ + if (mddev->delta_disks) + return -EBUSY; + + if (mddev->persistent && + mddev->major_version == 0) + return -EINVAL; + + mddev->reshape_backwards = backwards; + return len; +} + +static struct md_sysfs_entry md_reshape_direction = +__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show, + reshape_direction_store); + static ssize_t array_size_show(struct mddev *mddev, char *page) { @@ -4501,6 +4555,7 @@ static struct attribute *md_default_attrs[] = { &md_safe_delay.attr, &md_array_state.attr, &md_reshape_position.attr, + &md_reshape_direction.attr, &md_array_size.attr, &max_corr_read_errors.attr, NULL, @@ -5064,6 +5119,7 @@ static void md_clean(struct mddev *mddev) mddev->events = 0; mddev->can_decrease_events = 0; mddev->delta_disks = 0; + mddev->reshape_backwards = 0; mddev->new_level = LEVEL_NONE; mddev->new_layout = 0; mddev->new_chunk_sectors = 0; @@ -5888,6 +5944,7 @@ static int set_array_info(struct mddev * mddev, mdu_array_info_t *info) mddev->new_chunk_sectors = mddev->chunk_sectors; mddev->new_layout = mddev->layout; mddev->delta_disks = 0; + mddev->reshape_backwards = 0; return 0; } @@ -5953,10 +6010,16 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks) if (mddev->sync_thread || mddev->reshape_position != MaxSector) return -EBUSY; mddev->delta_disks = raid_disks - mddev->raid_disks; + if (mddev->delta_disks < 0) + mddev->reshape_backwards = 1; + else if (mddev->delta_disks > 0) + mddev->reshape_backwards = 0; rv = mddev->pers->check_reshape(mddev); - if (rv < 0) + if (rv < 0) { mddev->delta_disks = 0; + mddev->reshape_backwards = 0; + } return rv; } diff --git a/drivers/md/md.h b/drivers/md/md.h index 1c2063ccf48e..d51c0ca37777 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -262,6 +262,7 @@ struct mddev { sector_t reshape_position; int delta_disks, new_level, new_layout; int new_chunk_sectors; + int reshape_backwards; atomic_t plug_cnt; /* If device is expecting * more bios soon. diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index f351422938e0..0abbd3447cfb 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3970,13 +3970,13 @@ static void make_request(struct mddev *mddev, struct bio * bi) * to check again. */ spin_lock_irq(&conf->device_lock); - if (mddev->delta_disks < 0 + if (mddev->reshape_backwards ? logical_sector < conf->reshape_progress : logical_sector >= conf->reshape_progress) { disks = conf->previous_raid_disks; previous = 1; } else { - if (mddev->delta_disks < 0 + if (mddev->reshape_backwards ? logical_sector < conf->reshape_safe : logical_sector >= conf->reshape_safe) { spin_unlock_irq(&conf->device_lock); @@ -4009,7 +4009,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) */ int must_retry = 0; spin_lock_irq(&conf->device_lock); - if (mddev->delta_disks < 0 + if (mddev->reshape_backwards ? logical_sector >= conf->reshape_progress : logical_sector < conf->reshape_progress) /* mismatch, need to try again */ @@ -4108,11 +4108,11 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk if (sector_nr == 0) { /* If restarting in the middle, skip the initial sectors */ - if (mddev->delta_disks < 0 && + if (mddev->reshape_backwards && conf->reshape_progress < raid5_size(mddev, 0, 0)) { sector_nr = raid5_size(mddev, 0, 0) - conf->reshape_progress; - } else if (mddev->delta_disks >= 0 && + } else if (!mddev->reshape_backwards && conf->reshape_progress > 0) sector_nr = conf->reshape_progress; sector_div(sector_nr, new_data_disks); @@ -4147,7 +4147,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk sector_div(readpos, data_disks); safepos = conf->reshape_safe; sector_div(safepos, data_disks); - if (mddev->delta_disks < 0) { + if (mddev->reshape_backwards) { writepos -= min_t(sector_t, reshape_sectors, writepos); readpos += reshape_sectors; safepos += reshape_sectors; @@ -4174,7 +4174,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk * Maybe that number should be configurable, but I'm not sure it is * worth it.... maybe it could be a multiple of safemode_delay??? */ - if ((mddev->delta_disks < 0 + if ((mddev->reshape_backwards ? (safepos > writepos && readpos < writepos) : (safepos < writepos && readpos > writepos)) || time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) { @@ -4195,7 +4195,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk sysfs_notify(&mddev->kobj, NULL, "sync_completed"); } - if (mddev->delta_disks < 0) { + if (mddev->reshape_backwards) { BUG_ON(conf->reshape_progress == 0); stripe_addr = writepos; BUG_ON((mddev->dev_sectors & @@ -4239,7 +4239,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk list_add(&sh->lru, &stripes); } spin_lock_irq(&conf->device_lock); - if (mddev->delta_disks < 0) + if (mddev->reshape_backwards) conf->reshape_progress -= reshape_sectors * new_data_disks; else conf->reshape_progress += reshape_sectors * new_data_disks; @@ -5008,7 +5008,7 @@ static int run(struct mddev *mddev) mdname(mddev)); return -EINVAL; } - } else if (mddev->delta_disks < 0 + } else if (mddev->reshape_backwards ? (here_new * mddev->new_chunk_sectors <= here_old * mddev->chunk_sectors) : (here_new * mddev->new_chunk_sectors >= @@ -5535,7 +5535,7 @@ static int raid5_start_reshape(struct mddev *mddev) conf->chunk_sectors = mddev->new_chunk_sectors; conf->prev_algo = conf->algorithm; conf->algorithm = mddev->new_layout; - if (mddev->delta_disks < 0) + if (mddev->reshape_backwards) conf->reshape_progress = raid5_size(mddev, 0, 0); else conf->reshape_progress = 0; @@ -5663,6 +5663,7 @@ static void raid5_finish_reshape(struct mddev *mddev) mddev->chunk_sectors = conf->chunk_sectors; mddev->reshape_position = MaxSector; mddev->delta_disks = 0; + mddev->reshape_backwards = 0; } } diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h index 8c0a3adc5df5..07e05f92d050 100644 --- a/include/linux/raid/md_p.h +++ b/include/linux/raid/md_p.h @@ -281,10 +281,15 @@ struct mdp_superblock_1 { * active device with same 'role'. * 'recovery_offset' is also set. */ +#define MD_FEATURE_RESHAPE_BACKWARDS 32 /* Reshape doesn't change number + * of devices, but is going + * backwards anyway. + */ #define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \ |MD_FEATURE_RECOVERY_OFFSET \ |MD_FEATURE_RESHAPE_ACTIVE \ |MD_FEATURE_BAD_BLOCKS \ - |MD_FEATURE_REPLACEMENT) + |MD_FEATURE_REPLACEMENT \ + |MD_FEATURE_RESHAPE_BACKWARDS) #endif -- cgit v1.2.3-59-g8ed1b From c6563a8c38fde3c1c7fc925a10bde3ca20799301 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 21 May 2012 09:27:00 +1000 Subject: md: add possibility to change data-offset for devices. When reshaping we can avoid costly intermediate backup by changing the 'start' address of the array on the device (if there is enough room). So as a first step, allow such a change to be requested through sysfs, and recorded in v1.x metadata. (As we didn't previous check that all 'pad' fields were zero, we need a new FEATURE flag for this. A (belatedly) check that all remaining 'pad' fields are zero to avoid a repeat of this) The new data offset must be requested separately for each device. This allows each to have a different change in the data offset. This is not likely to be used often but as data_offset can be set per-device, new_data_offset should be too. This patch also removes the 'acknowledged' arg to rdev_set_badblocks as it is never used and never will be. At the same time we add a new arg ('in_new') which is currently always zero but will be used more soon. When a reshape finishes we will need to update the data_offset and rdev->sectors. So provide an exported function to do that. Signed-off-by: NeilBrown --- drivers/md/md.c | 217 +++++++++++++++++++++++++++++++++++++++++----- drivers/md/md.h | 7 +- drivers/md/raid1.c | 4 +- drivers/md/raid10.c | 8 +- drivers/md/raid5.c | 10 ++- include/linux/raid/md_p.h | 10 ++- 6 files changed, 222 insertions(+), 34 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 44bb1d52dd4c..9fa98fc74b05 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1035,12 +1035,17 @@ static unsigned int calc_sb_csum(mdp_super_t * sb) struct super_type { char *name; struct module *owner; - int (*load_super)(struct md_rdev *rdev, struct md_rdev *refdev, + int (*load_super)(struct md_rdev *rdev, + struct md_rdev *refdev, int minor_version); - int (*validate_super)(struct mddev *mddev, struct md_rdev *rdev); - void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev); + int (*validate_super)(struct mddev *mddev, + struct md_rdev *rdev); + void (*sync_super)(struct mddev *mddev, + struct md_rdev *rdev); unsigned long long (*rdev_size_change)(struct md_rdev *rdev, sector_t num_sectors); + int (*allow_new_offset)(struct md_rdev *rdev, + unsigned long long new_offset); }; /* @@ -1112,6 +1117,7 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor rdev->preferred_minor = sb->md_minor; rdev->data_offset = 0; + rdev->new_data_offset = 0; rdev->sb_size = MD_SB_BYTES; rdev->badblocks.shift = -1; @@ -1438,6 +1444,12 @@ super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) return num_sectors; } +static int +super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset) +{ + /* non-zero offset changes not possible with v0.90 */ + return new_offset == 0; +} /* * version 1 superblock @@ -1473,6 +1485,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ struct mdp_superblock_1 *sb; int ret; sector_t sb_start; + sector_t sectors; char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; int bmask; @@ -1527,9 +1540,18 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ bdevname(rdev->bdev,b)); return -EINVAL; } + if (sb->pad0 || + sb->pad3[0] || + memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1]))) + /* Some padding is non-zero, might be a new feature */ + return -EINVAL; rdev->preferred_minor = 0xffff; rdev->data_offset = le64_to_cpu(sb->data_offset); + rdev->new_data_offset = rdev->data_offset; + if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) && + (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET)) + rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset); atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read)); rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256; @@ -1540,6 +1562,9 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ if (minor_version && rdev->data_offset < sb_start + (rdev->sb_size/512)) return -EINVAL; + if (minor_version + && rdev->new_data_offset < sb_start + (rdev->sb_size/512)) + return -EINVAL; if (sb->level == cpu_to_le32(LEVEL_MULTIPATH)) rdev->desc_nr = -1; @@ -1611,16 +1636,14 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ else ret = 0; } - if (minor_version) - rdev->sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) - - le64_to_cpu(sb->data_offset); - else - rdev->sectors = rdev->sb_start; - if (rdev->sectors < le64_to_cpu(sb->data_size)) + if (minor_version) { + sectors = (i_size_read(rdev->bdev->bd_inode) >> 9); + sectors -= rdev->data_offset; + } else + sectors = rdev->sb_start; + if (sectors < le64_to_cpu(sb->data_size)) return -EINVAL; rdev->sectors = le64_to_cpu(sb->data_size); - if (le64_to_cpu(sb->size) > rdev->sectors) - return -EINVAL; return ret; } @@ -1745,7 +1768,6 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev) sb->feature_map = 0; sb->pad0 = 0; sb->recovery_offset = cpu_to_le64(0); - memset(sb->pad1, 0, sizeof(sb->pad1)); memset(sb->pad3, 0, sizeof(sb->pad3)); sb->utime = cpu_to_le64((__u64)mddev->utime); @@ -1767,6 +1789,8 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev) sb->devflags |= WriteMostly1; else sb->devflags &= ~WriteMostly1; + sb->data_offset = cpu_to_le64(rdev->data_offset); + sb->data_size = cpu_to_le64(rdev->sectors); if (mddev->bitmap && mddev->bitmap_info.file == NULL) { sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset); @@ -1795,6 +1819,12 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev) mddev->reshape_backwards) sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS); + if (rdev->new_data_offset != rdev->data_offset) { + sb->feature_map + |= cpu_to_le32(MD_FEATURE_NEW_OFFSET); + sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset + - rdev->data_offset)); + } } if (rdev->badblocks.count == 0) @@ -1871,6 +1901,8 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) sector_t max_sectors; if (num_sectors && num_sectors < rdev->mddev->dev_sectors) return 0; /* component must fit device */ + if (rdev->data_offset != rdev->new_data_offset) + return 0; /* too confusing */ if (rdev->sb_start < rdev->data_offset) { /* minor versions 1 and 2; superblock before data */ max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9; @@ -1898,6 +1930,40 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) rdev->sb_page); md_super_wait(rdev->mddev); return num_sectors; + +} + +static int +super_1_allow_new_offset(struct md_rdev *rdev, + unsigned long long new_offset) +{ + /* All necessary checks on new >= old have been done */ + struct bitmap *bitmap; + if (new_offset >= rdev->data_offset) + return 1; + + /* with 1.0 metadata, there is no metadata to tread on + * so we can always move back */ + if (rdev->mddev->minor_version == 0) + return 1; + + /* otherwise we must be sure not to step on + * any metadata, so stay: + * 36K beyond start of superblock + * beyond end of badblocks + * beyond write-intent bitmap + */ + if (rdev->sb_start + (32+4)*2 > new_offset) + return 0; + bitmap = rdev->mddev->bitmap; + if (bitmap && !rdev->mddev->bitmap_info.file && + rdev->sb_start + rdev->mddev->bitmap_info.offset + + bitmap->file_pages * (PAGE_SIZE>>9) > new_offset) + return 0; + if (rdev->badblocks.sector + rdev->badblocks.size > new_offset) + return 0; + + return 1; } static struct super_type super_types[] = { @@ -1908,6 +1974,7 @@ static struct super_type super_types[] = { .validate_super = super_90_validate, .sync_super = super_90_sync, .rdev_size_change = super_90_rdev_size_change, + .allow_new_offset = super_90_allow_new_offset, }, [1] = { .name = "md-1", @@ -1916,6 +1983,7 @@ static struct super_type super_types[] = { .validate_super = super_1_validate, .sync_super = super_1_sync, .rdev_size_change = super_1_rdev_size_change, + .allow_new_offset = super_1_allow_new_offset, }, }; @@ -2823,9 +2891,8 @@ offset_show(struct md_rdev *rdev, char *page) static ssize_t offset_store(struct md_rdev *rdev, const char *buf, size_t len) { - char *e; - unsigned long long offset = simple_strtoull(buf, &e, 10); - if (e==buf || (*e && *e != '\n')) + unsigned long long offset; + if (strict_strtoull(buf, 10, &offset) < 0) return -EINVAL; if (rdev->mddev->pers && rdev->raid_disk >= 0) return -EBUSY; @@ -2840,6 +2907,63 @@ offset_store(struct md_rdev *rdev, const char *buf, size_t len) static struct rdev_sysfs_entry rdev_offset = __ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store); +static ssize_t new_offset_show(struct md_rdev *rdev, char *page) +{ + return sprintf(page, "%llu\n", + (unsigned long long)rdev->new_data_offset); +} + +static ssize_t new_offset_store(struct md_rdev *rdev, + const char *buf, size_t len) +{ + unsigned long long new_offset; + struct mddev *mddev = rdev->mddev; + + if (strict_strtoull(buf, 10, &new_offset) < 0) + return -EINVAL; + + if (mddev->sync_thread) + return -EBUSY; + if (new_offset == rdev->data_offset) + /* reset is always permitted */ + ; + else if (new_offset > rdev->data_offset) { + /* must not push array size beyond rdev_sectors */ + if (new_offset - rdev->data_offset + + mddev->dev_sectors > rdev->sectors) + return -E2BIG; + } + /* Metadata worries about other space details. */ + + /* decreasing the offset is inconsistent with a backwards + * reshape. + */ + if (new_offset < rdev->data_offset && + mddev->reshape_backwards) + return -EINVAL; + /* Increasing offset is inconsistent with forwards + * reshape. reshape_direction should be set to + * 'backwards' first. + */ + if (new_offset > rdev->data_offset && + !mddev->reshape_backwards) + return -EINVAL; + + if (mddev->pers && mddev->persistent && + !super_types[mddev->major_version] + .allow_new_offset(rdev, new_offset)) + return -E2BIG; + rdev->new_data_offset = new_offset; + if (new_offset > rdev->data_offset) + mddev->reshape_backwards = 1; + else if (new_offset < rdev->data_offset) + mddev->reshape_backwards = 0; + + return len; +} +static struct rdev_sysfs_entry rdev_new_offset = +__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store); + static ssize_t rdev_size_show(struct md_rdev *rdev, char *page) { @@ -2884,6 +3008,8 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len) if (strict_blocks_to_sectors(buf, §ors) < 0) return -EINVAL; + if (rdev->data_offset != rdev->new_data_offset) + return -EINVAL; /* too confusing */ if (my_mddev->pers && rdev->raid_disk >= 0) { if (my_mddev->persistent) { sectors = super_types[my_mddev->major_version]. @@ -3020,6 +3146,7 @@ static struct attribute *rdev_default_attrs[] = { &rdev_errors.attr, &rdev_slot.attr, &rdev_offset.attr, + &rdev_new_offset.attr, &rdev_size.attr, &rdev_recovery_start.attr, &rdev_bad_blocks.attr, @@ -3094,6 +3221,7 @@ int md_rdev_init(struct md_rdev *rdev) rdev->raid_disk = -1; rdev->flags = 0; rdev->data_offset = 0; + rdev->new_data_offset = 0; rdev->sb_events = 0; rdev->last_read_error.tv_sec = 0; rdev->last_read_error.tv_nsec = 0; @@ -3598,7 +3726,17 @@ raid_disks_store(struct mddev *mddev, const char *buf, size_t len) if (mddev->pers) rv = update_raid_disks(mddev, n); else if (mddev->reshape_position != MaxSector) { + struct md_rdev *rdev; int olddisks = mddev->raid_disks - mddev->delta_disks; + + rdev_for_each(rdev, mddev) { + if (olddisks < n && + rdev->data_offset < rdev->new_data_offset) + return -EINVAL; + if (olddisks > n && + rdev->data_offset > rdev->new_data_offset) + return -EINVAL; + } mddev->delta_disks = n - olddisks; mddev->raid_disks = n; mddev->reshape_backwards = (mddev->delta_disks < 0); @@ -4445,6 +4583,7 @@ reshape_position_show(struct mddev *mddev, char *page) static ssize_t reshape_position_store(struct mddev *mddev, const char *buf, size_t len) { + struct md_rdev *rdev; char *e; unsigned long long new = simple_strtoull(buf, &e, 10); if (mddev->pers) @@ -4457,6 +4596,8 @@ reshape_position_store(struct mddev *mddev, const char *buf, size_t len) mddev->new_level = mddev->level; mddev->new_layout = mddev->layout; mddev->new_chunk_sectors = mddev->chunk_sectors; + rdev_for_each(rdev, mddev) + rdev->new_data_offset = rdev->data_offset; return len; } @@ -6001,6 +6142,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) static int update_raid_disks(struct mddev *mddev, int raid_disks) { int rv; + struct md_rdev *rdev; /* change the number of raid disks */ if (mddev->pers->check_reshape == NULL) return -EINVAL; @@ -6009,6 +6151,16 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks) return -EINVAL; if (mddev->sync_thread || mddev->reshape_position != MaxSector) return -EBUSY; + + rdev_for_each(rdev, mddev) { + if (mddev->raid_disks < raid_disks && + rdev->data_offset < rdev->new_data_offset) + return -EINVAL; + if (mddev->raid_disks > raid_disks && + rdev->data_offset > rdev->new_data_offset) + return -EINVAL; + } + mddev->delta_disks = raid_disks - mddev->raid_disks; if (mddev->delta_disks < 0) mddev->reshape_backwards = 1; @@ -7709,6 +7861,20 @@ void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev) } EXPORT_SYMBOL(md_wait_for_blocked_rdev); +void md_finish_reshape(struct mddev *mddev) +{ + /* called be personality module when reshape completes. */ + struct md_rdev *rdev; + + rdev_for_each(rdev, mddev) { + if (rdev->data_offset > rdev->new_data_offset) + rdev->sectors += rdev->data_offset - rdev->new_data_offset; + else + rdev->sectors -= rdev->new_data_offset - rdev->data_offset; + rdev->data_offset = rdev->new_data_offset; + } +} +EXPORT_SYMBOL(md_finish_reshape); /* Bad block management. * We can record which blocks on each device are 'bad' and so just @@ -7957,10 +8123,15 @@ static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors, } int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors, - int acknowledged) + int is_new) { - int rv = md_set_badblocks(&rdev->badblocks, - s + rdev->data_offset, sectors, acknowledged); + int rv; + if (is_new) + s += rdev->new_data_offset; + else + s += rdev->data_offset; + rv = md_set_badblocks(&rdev->badblocks, + s, sectors, 0); if (rv) { /* Make sure they get written out promptly */ sysfs_notify_dirent_safe(rdev->sysfs_state); @@ -8066,11 +8237,15 @@ out: return rv; } -int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors) +int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors, + int is_new) { + if (is_new) + s += rdev->new_data_offset; + else + s += rdev->data_offset; return md_clear_badblocks(&rdev->badblocks, - s + rdev->data_offset, - sectors); + s, sectors); } EXPORT_SYMBOL_GPL(rdev_clear_badblocks); diff --git a/drivers/md/md.h b/drivers/md/md.h index d51c0ca37777..98913e8dac1a 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -55,6 +55,7 @@ struct md_rdev { int sb_loaded; __u64 sb_events; sector_t data_offset; /* start of data in array */ + sector_t new_data_offset;/* only relevant while reshaping */ sector_t sb_start; /* offset of the super block (in 512byte sectors) */ int sb_size; /* bytes in the superblock */ int preferred_minor; /* autorun support */ @@ -193,8 +194,9 @@ static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors, return 0; } extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors, - int acknowledged); -extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors); + int is_new); +extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors, + int is_new); extern void md_ack_all_badblocks(struct badblocks *bb); struct mddev { @@ -592,6 +594,7 @@ extern void md_write_start(struct mddev *mddev, struct bio *bi); extern void md_write_end(struct mddev *mddev); extern void md_done_sync(struct mddev *mddev, int blocks, int ok); extern void md_error(struct mddev *mddev, struct md_rdev *rdev); +extern void md_finish_reshape(struct mddev *mddev); extern int mddev_congested(struct mddev *mddev, int bits); extern void md_flush_request(struct mddev *mddev, struct bio *bio); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 15dd59b84e94..71a7dc038a82 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2024,7 +2024,7 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio continue; if (test_bit(BIO_UPTODATE, &bio->bi_flags) && test_bit(R1BIO_MadeGood, &r1_bio->state)) { - rdev_clear_badblocks(rdev, r1_bio->sector, s); + rdev_clear_badblocks(rdev, r1_bio->sector, s, 0); } if (!test_bit(BIO_UPTODATE, &bio->bi_flags) && test_bit(R1BIO_WriteError, &r1_bio->state)) { @@ -2044,7 +2044,7 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio) struct md_rdev *rdev = conf->mirrors[m].rdev; rdev_clear_badblocks(rdev, r1_bio->sector, - r1_bio->sectors); + r1_bio->sectors, 0); rdev_dec_pending(rdev, conf->mddev); } else if (r1_bio->bios[m] != NULL) { /* This drive got a write error. We need to diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 3f91c2e1dfe7..832fb4d56657 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2480,7 +2480,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) rdev_clear_badblocks( rdev, r10_bio->devs[m].addr, - r10_bio->sectors); + r10_bio->sectors, 0); } else { if (!rdev_set_badblocks( rdev, @@ -2496,7 +2496,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) rdev_clear_badblocks( rdev, r10_bio->devs[m].addr, - r10_bio->sectors); + r10_bio->sectors, 0); } else { if (!rdev_set_badblocks( rdev, @@ -2515,7 +2515,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) rdev_clear_badblocks( rdev, r10_bio->devs[m].addr, - r10_bio->sectors); + r10_bio->sectors, 0); rdev_dec_pending(rdev, conf->mddev); } else if (bio != NULL && !test_bit(BIO_UPTODATE, &bio->bi_flags)) { @@ -2532,7 +2532,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) rdev_clear_badblocks( rdev, r10_bio->devs[m].addr, - r10_bio->sectors); + r10_bio->sectors, 0); rdev_dec_pending(rdev, conf->mddev); } } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 0abbd3447cfb..3705585d7567 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3561,7 +3561,7 @@ finish: if (test_and_clear_bit(R5_MadeGood, &dev->flags)) { rdev = conf->disks[i].rdev; rdev_clear_badblocks(rdev, sh->sector, - STRIPE_SECTORS); + STRIPE_SECTORS, 0); rdev_dec_pending(rdev, conf->mddev); } if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) { @@ -3570,7 +3570,7 @@ finish: /* rdev have been moved down */ rdev = conf->disks[i].rdev; rdev_clear_badblocks(rdev, sh->sector, - STRIPE_SECTORS); + STRIPE_SECTORS, 0); rdev_dec_pending(rdev, conf->mddev); } } @@ -5505,10 +5505,14 @@ static int raid5_start_reshape(struct mddev *mddev) if (!check_stripe_cache(mddev)) return -ENOSPC; - rdev_for_each(rdev, mddev) + rdev_for_each(rdev, mddev) { + /* Don't support changing data_offset yet */ + if (rdev->new_data_offset != rdev->data_offset) + return -EINVAL; if (!test_bit(In_sync, &rdev->flags) && !test_bit(Faulty, &rdev->flags)) spares++; + } if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded) /* Not enough devices even to make a degraded array diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h index 07e05f92d050..ee753536ab70 100644 --- a/include/linux/raid/md_p.h +++ b/include/linux/raid/md_p.h @@ -233,7 +233,10 @@ struct mdp_superblock_1 { __le32 delta_disks; /* change in number of raid_disks */ __le32 new_layout; /* new layout */ __le32 new_chunk; /* new chunk size (512byte sectors) */ - __u8 pad1[128-124]; /* set to 0 when written */ + __le32 new_offset; /* signed number to add to data_offset in new + * layout. 0 == no-change. This can be + * different on each device in the array. + */ /* constant this-device information - 64 bytes */ __le64 data_offset; /* sector start of data, often 0 */ @@ -285,11 +288,14 @@ struct mdp_superblock_1 { * of devices, but is going * backwards anyway. */ +#define MD_FEATURE_NEW_OFFSET 64 /* new_offset must be honoured */ #define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \ |MD_FEATURE_RECOVERY_OFFSET \ |MD_FEATURE_RESHAPE_ACTIVE \ |MD_FEATURE_BAD_BLOCKS \ |MD_FEATURE_REPLACEMENT \ - |MD_FEATURE_RESHAPE_BACKWARDS) + |MD_FEATURE_RESHAPE_BACKWARDS \ + |MD_FEATURE_NEW_OFFSET \ + ) #endif -- cgit v1.2.3-59-g8ed1b From 1fdd6fc92f330b81604c9a4f892f713775a9751a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 21 May 2012 09:28:32 +1000 Subject: md: teach sync_page_io about new_data_offset. Some code in raid1 and raid10 use sync_page_io to read/write pages when responding to read errors. As we will shortly support changing data_offset for raid10, this function must understand new_data_offset. So add that understanding. Signed-off-by: NeilBrown --- drivers/md/md.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 9fa98fc74b05..c6b8d9fd0b04 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -888,6 +888,10 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, rdev->meta_bdev : rdev->bdev; if (metadata_op) bio->bi_sector = sector + rdev->sb_start; + else if (rdev->mddev->reshape_position != MaxSector && + (rdev->mddev->reshape_backwards == + (sector >= rdev->mddev->reshape_position))) + bio->bi_sector = sector + rdev->new_data_offset; else bio->bi_sector = sector + rdev->data_offset; bio_add_page(bio, page, size, 0); -- cgit v1.2.3-59-g8ed1b From c804cdecea418c067ee7359d62139b2b3c8cec39 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 21 May 2012 09:28:33 +1000 Subject: md: use resync_max_sectors for reshape as well as resync. Some resync type operations need to act on the address space of the device, others on the address space of the array. This only affects RAID10, so it sets resync_max_sectors to the array size (it defaults to the device size), and that is currently used for resync only. However reshape of a RAID10 must be done against the array size, not device size, so change code to use resync_max_sectors for both the resync and the reshape cases. This does not affect RAID5 or RAID1, just RAID10. Signed-off-by: NeilBrown --- drivers/md/md.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index c6b8d9fd0b04..8fe1abf1b89c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4425,7 +4425,8 @@ sync_completed_show(struct mddev *mddev, char *page) if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) return sprintf(page, "none\n"); - if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) + if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) || + test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) max_sectors = mddev->resync_max_sectors; else max_sectors = mddev->dev_sectors; @@ -6803,7 +6804,8 @@ static void status_resync(struct seq_file *seq, struct mddev * mddev) resync = mddev->curr_resync - atomic_read(&mddev->recovery_active); - if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) + if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) || + test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) max_sectors = mddev->resync_max_sectors; else max_sectors = mddev->dev_sectors; @@ -7366,7 +7368,7 @@ void md_do_sync(struct mddev *mddev) j = mddev->recovery_cp; } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) - max_sectors = mddev->dev_sectors; + max_sectors = mddev->resync_max_sectors; else { /* recovery follows the physical size of devices */ max_sectors = mddev->dev_sectors; -- cgit v1.2.3-59-g8ed1b From 545c87957f4d53867b62921625f36df8c4b1bc08 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 22 May 2012 13:54:30 +1000 Subject: md: dm-raid should call helper function to clear rdev. dm-raid currently open-codes the freeing of some members of and rdev. It is more maintainable to have it call common code from md.c which does this for all call-sites. So remove free_disk_sb to md_rdev_clear, export it, and use it in dm-raid.c Signed-off-by: NeilBrown --- drivers/md/dm-raid.c | 5 +---- drivers/md/md.c | 8 ++++---- drivers/md/md.h | 1 + 3 files changed, 6 insertions(+), 8 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 68965e663248..73a068da10d9 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -155,10 +155,7 @@ static void context_free(struct raid_set *rs) for (i = 0; i < rs->md.raid_disks; i++) { if (rs->dev[i].meta_dev) dm_put_device(rs->ti, rs->dev[i].meta_dev); - if (rs->dev[i].rdev.sb_page) - put_page(rs->dev[i].rdev.sb_page); - rs->dev[i].rdev.sb_page = NULL; - rs->dev[i].rdev.sb_loaded = 0; + md_rdev_clear(&rs->dev[i].rdev); if (rs->dev[i].data_dev) dm_put_device(rs->ti, rs->dev[i].data_dev); } diff --git a/drivers/md/md.c b/drivers/md/md.c index 8fe1abf1b89c..d557e557ff8f 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -803,7 +803,7 @@ static int alloc_disk_sb(struct md_rdev * rdev) return 0; } -static void free_disk_sb(struct md_rdev * rdev) +void md_rdev_clear(struct md_rdev *rdev) { if (rdev->sb_page) { put_page(rdev->sb_page); @@ -817,7 +817,7 @@ static void free_disk_sb(struct md_rdev * rdev) rdev->bb_page = NULL; } } - +EXPORT_SYMBOL_GPL(md_rdev_clear); static void super_written(struct bio *bio, int error) { @@ -2244,7 +2244,7 @@ static void export_rdev(struct md_rdev * rdev) bdevname(rdev->bdev,b)); if (rdev->mddev) MD_BUG(); - free_disk_sb(rdev); + md_rdev_clear(rdev); #ifndef MODULE if (test_bit(AutoDetected, &rdev->flags)) md_autodetect_dev(rdev->bdev->bd_dev); @@ -3324,7 +3324,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe abort_free: if (rdev->bdev) unlock_rdev(rdev); - free_disk_sb(rdev); + md_rdev_clear(rdev); kfree(rdev->badblocks.page); kfree(rdev); return ERR_PTR(err); diff --git a/drivers/md/md.h b/drivers/md/md.h index 98913e8dac1a..360937389e64 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -619,6 +619,7 @@ extern int md_run(struct mddev *mddev); extern void md_stop(struct mddev *mddev); extern void md_stop_writes(struct mddev *mddev); extern int md_rdev_init(struct md_rdev *rdev); +extern void md_rdev_clear(struct md_rdev *rdev); extern void mddev_suspend(struct mddev *mddev); extern void mddev_resume(struct mddev *mddev); -- cgit v1.2.3-59-g8ed1b From 4fa2f327681808f653711e14203a42cf4644bda0 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 22 May 2012 13:55:01 +1000 Subject: md: move freeing of badblocks.page into md_rdev_clear This ensures that it is always freed - there were case where we failed to free the page. Reported-by: majianpeng Signed-off-by: NeilBrown --- drivers/md/md.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index d557e557ff8f..ac99616f48d4 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -816,6 +816,8 @@ void md_rdev_clear(struct md_rdev *rdev) put_page(rdev->bb_page); rdev->bb_page = NULL; } + kfree(rdev->badblocks.page); + rdev->badblocks.page = NULL; } EXPORT_SYMBOL_GPL(md_rdev_clear); @@ -2191,9 +2193,7 @@ static void unbind_rdev_from_array(struct md_rdev * rdev) sysfs_remove_link(&rdev->kobj, "block"); sysfs_put(rdev->sysfs_state); rdev->sysfs_state = NULL; - kfree(rdev->badblocks.page); rdev->badblocks.count = 0; - rdev->badblocks.page = NULL; /* We need to delay this, otherwise we can deadlock when * writing to 'remove' to "dev/state". We also need * to delay it due to rcu usage. @@ -3325,7 +3325,6 @@ abort_free: if (rdev->bdev) unlock_rdev(rdev); md_rdev_clear(rdev); - kfree(rdev->badblocks.page); kfree(rdev); return ERR_PTR(err); } -- cgit v1.2.3-59-g8ed1b From 6409bb05a9831f6af36a20b97cda13059c2ef1b6 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 22 May 2012 13:55:07 +1000 Subject: md/bitmap: add new 'space' attribute for bitmaps. If we are to allow bitmaps to be resized when the array is resized, we need to know how much space there is. So create an attribute to store this information and set appropriate defaults. It can be set more precisely via sysfs, or future metadata extensions may allow it to be recorded. Signed-off-by: NeilBrown --- drivers/md/bitmap.c | 39 +++++++++++++++++++++++++++++++++++++++ drivers/md/md.c | 33 +++++++++++++++++++++++++++++++-- drivers/md/md.h | 3 +++ 3 files changed, 73 insertions(+), 2 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index c7784a985676..ac688fb54e1d 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1934,6 +1934,44 @@ location_store(struct mddev *mddev, const char *buf, size_t len) static struct md_sysfs_entry bitmap_location = __ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store); +/* 'bitmap/space' is the space available at 'location' for the + * bitmap. This allows the kernel to know when it is safe to + * resize the bitmap to match a resized array. + */ +static ssize_t +space_show(struct mddev *mddev, char *page) +{ + return sprintf(page, "%lu\n", mddev->bitmap_info.space); +} + +static ssize_t +space_store(struct mddev *mddev, const char *buf, size_t len) +{ + unsigned long sectors; + int rv; + + rv = kstrtoul(buf, 10, §ors); + if (rv) + return rv; + + if (sectors == 0) + return -EINVAL; + + if (mddev->bitmap && + sectors < ((mddev->bitmap->file_pages - 1) * PAGE_SIZE + + mddev->bitmap->last_page_size + 511) >> 9) + return -EFBIG; /* Bitmap is too big for this small space */ + + /* could make sure it isn't too big, but that isn't really + * needed - user-space should be careful. + */ + mddev->bitmap_info.space = sectors; + return len; +} + +static struct md_sysfs_entry bitmap_space = +__ATTR(space, S_IRUGO|S_IWUSR, space_show, space_store); + static ssize_t timeout_show(struct mddev *mddev, char *page) { @@ -2109,6 +2147,7 @@ __ATTR(max_backlog_used, S_IRUGO | S_IWUSR, static struct attribute *md_bitmap_attrs[] = { &bitmap_location.attr, + &bitmap_space.attr, &bitmap_timeout.attr, &bitmap_backlog.attr, &bitmap_chunksize.attr, diff --git a/drivers/md/md.c b/drivers/md/md.c index ac99616f48d4..9a677f2078a7 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1197,7 +1197,10 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev) mddev->dev_sectors = ((sector_t)sb->size) * 2; mddev->events = ev1; mddev->bitmap_info.offset = 0; + mddev->bitmap_info.space = 0; + /* bitmap can use 60 K after the 4K superblocks */ mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9; + mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9); mddev->reshape_backwards = 0; if (mddev->minor_version >= 91) { @@ -1234,9 +1237,12 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev) mddev->max_disks = MD_SB_DISKS; if (sb->state & (1<bitmap_info.file == NULL) + mddev->bitmap_info.file == NULL) { mddev->bitmap_info.offset = mddev->bitmap_info.default_offset; + mddev->bitmap_info.space = + mddev->bitmap_info.space; + } } else if (mddev->pers == NULL) { /* Insist on good event counter while assembling, except @@ -1677,7 +1683,12 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) mddev->dev_sectors = le64_to_cpu(sb->size); mddev->events = ev1; mddev->bitmap_info.offset = 0; + mddev->bitmap_info.space = 0; + /* Default location for bitmap is 1K after superblock + * using 3K - total of 4K + */ mddev->bitmap_info.default_offset = 1024 >> 9; + mddev->bitmap_info.default_space = (4096-1024) >> 9; mddev->reshape_backwards = 0; mddev->recovery_cp = le64_to_cpu(sb->resync_offset); @@ -1686,9 +1697,23 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) mddev->max_disks = (4096-256)/2; if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) && - mddev->bitmap_info.file == NULL ) + mddev->bitmap_info.file == NULL) { mddev->bitmap_info.offset = (__s32)le32_to_cpu(sb->bitmap_offset); + /* Metadata doesn't record how much space is available. + * For 1.0, we assume we can use up to the superblock + * if before, else to 4K beyond superblock. + * For others, assume no change is possible. + */ + if (mddev->minor_version > 0) + mddev->bitmap_info.space = 0; + else if (mddev->bitmap_info.offset > 0) + mddev->bitmap_info.space = + 8 - mddev->bitmap_info.offset; + else + mddev->bitmap_info.space = + -mddev->bitmap_info.offset; + } if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) { mddev->reshape_position = le64_to_cpu(sb->reshape_position); @@ -5280,6 +5305,7 @@ static void md_clean(struct mddev *mddev) mddev->merge_check_needed = 0; mddev->bitmap_info.offset = 0; mddev->bitmap_info.default_offset = 0; + mddev->bitmap_info.default_space = 0; mddev->bitmap_info.chunksize = 0; mddev->bitmap_info.daemon_sleep = 0; mddev->bitmap_info.max_write_behind = 0; @@ -6076,6 +6102,7 @@ static int set_array_info(struct mddev * mddev, mdu_array_info_t *info) set_bit(MD_CHANGE_DEVS, &mddev->flags); mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9; + mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9); mddev->bitmap_info.offset = 0; mddev->reshape_position = MaxSector; @@ -6258,6 +6285,8 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) return -EINVAL; mddev->bitmap_info.offset = mddev->bitmap_info.default_offset; + mddev->bitmap_info.space = + mddev->bitmap_info.default_space; mddev->pers->quiesce(mddev, 1); rv = bitmap_create(mddev); if (!rv) diff --git a/drivers/md/md.h b/drivers/md/md.h index 360937389e64..7b4a3c318cae 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -393,10 +393,13 @@ struct mddev { * For external metadata, offset * from start of device. */ + unsigned long space; /* space available at this offset */ loff_t default_offset; /* this is the offset to use when * hot-adding a bitmap. It should * eventually be settable by sysfs. */ + unsigned long default_space; /* space available at + * default offset */ struct mutex mutex; unsigned long chunksize; unsigned long daemon_sleep; /* how many jiffies between updates? */ -- cgit v1.2.3-59-g8ed1b From ef99bf480de9bde9d3b2afdf05324670fab4e571 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 22 May 2012 13:55:08 +1000 Subject: md/bitmap: allow a bitmap with no backing storage. An md bitmap comprises two parts - internal counting of active writes per 'chunk'. - external storage of whether there are any active writes on each chunk The second requires the first, but the first doesn't require the second. Not having backing storage means that the bitmap cannot expedite resync after a crash, but it still allows us to expedite the recovery of a recently-removed device. So: allow a bitmap to exist even if there is no backing device. In that case we default to 128M chunks. A particular value of this is that we can remove and re-add a bitmap (possibly of a different granularity) on a degraded array, and not lose the information needed to fast-recover the missing device. We don't actually activate these bitmaps yet - that will come in a later patch. Signed-off-by: NeilBrown --- drivers/md/bitmap.c | 136 +++++++++++++++++++++++++++++----------------------- drivers/md/md.c | 5 +- 2 files changed, 79 insertions(+), 62 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index ac688fb54e1d..c042efd019c3 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -553,6 +553,14 @@ static int bitmap_read_sb(struct bitmap *bitmap) unsigned long long events; int err = -EINVAL; + if (!bitmap->file && !bitmap->mddev->bitmap_info.offset) { + chunksize = 128 * 1024 * 1024; + daemon_sleep = 5 * HZ; + write_behind = 0; + bitmap->flags = BITMAP_STALE; + err = 0; + goto out_no_sb; + } /* page 0 is the superblock, read it... */ if (bitmap->file) { loff_t isize = i_size_read(bitmap->file->f_mapping->host); @@ -623,18 +631,19 @@ static int bitmap_read_sb(struct bitmap *bitmap) } /* assign fields using values from superblock */ - bitmap->mddev->bitmap_info.chunksize = chunksize; - bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; - bitmap->mddev->bitmap_info.max_write_behind = write_behind; bitmap->flags |= le32_to_cpu(sb->state); if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN) bitmap->flags |= BITMAP_HOSTENDIAN; bitmap->events_cleared = le64_to_cpu(sb->events_cleared); - if (bitmap->flags & BITMAP_STALE) - bitmap->events_cleared = bitmap->mddev->events; err = 0; out: kunmap_atomic(sb); +out_no_sb: + if (bitmap->flags & BITMAP_STALE) + bitmap->events_cleared = bitmap->mddev->events; + bitmap->mddev->bitmap_info.chunksize = chunksize; + bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; + bitmap->mddev->bitmap_info.max_write_behind = write_behind; if (err) bitmap_print_sb(bitmap); return err; @@ -837,9 +846,6 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) void *kaddr; unsigned long chunk = block >> bitmap->chunkshift; - if (!bitmap->filemap) - return; - page = filemap_get_page(bitmap, chunk); if (!page) return; @@ -857,6 +863,29 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); } +static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block) +{ + unsigned long bit; + struct page *page; + void *paddr; + unsigned long chunk = block >> bitmap->chunkshift; + + page = filemap_get_page(bitmap, chunk); + if (!page) + return; + bit = file_page_offset(bitmap, chunk); + paddr = kmap_atomic(page); + if (bitmap->flags & BITMAP_HOSTENDIAN) + clear_bit(bit, paddr); + else + __clear_bit_le(bit, paddr); + kunmap_atomic(paddr); + if (!test_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE)) { + set_page_attr(bitmap, page, BITMAP_PAGE_PENDING); + bitmap->allclean = 0; + } +} + /* this gets called when the md device is ready to unplug its underlying * (slave) device queues -- before we let any writes go down, we need to * sync the dirty pages of the bitmap file to disk */ @@ -867,7 +896,7 @@ void bitmap_unplug(struct bitmap *bitmap) struct page *page; int wait = 0; - if (!bitmap) + if (!bitmap || !bitmap->filemap) return; /* look at each page to see if there are any set bits that need to be @@ -930,7 +959,20 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) chunks = bitmap->chunks; file = bitmap->file; - BUG_ON(!file && !bitmap->mddev->bitmap_info.offset); + if (!file && !bitmap->mddev->bitmap_info.offset) { + /* No permanent bitmap - fill with '1s'. */ + bitmap->filemap = NULL; + bitmap->file_pages = 0; + for (i = 0; i < chunks ; i++) { + /* if the disk bit is set, set the memory bit */ + int needed = ((sector_t)(i+1) << (bitmap->chunkshift) + >= start); + bitmap_set_memory_bits(bitmap, + (sector_t)i << bitmap->chunkshift, + needed); + } + return 0; + } outofdate = bitmap->flags & BITMAP_STALE; if (outofdate) @@ -1045,15 +1087,6 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) } } - /* everything went OK */ - ret = 0; - bitmap_mask_state(bitmap, BITMAP_STALE, MASK_UNSET); - - if (bit_cnt) { /* Kick recovery if any bits were set */ - set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery); - md_wakeup_thread(bitmap->mddev->thread); - } - printk(KERN_INFO "%s: bitmap initialized from disk: " "read %lu/%lu pages, set %lu of %lu bits\n", bmname(bitmap), bitmap->file_pages, num_pages, bit_cnt, chunks); @@ -1073,6 +1106,12 @@ void bitmap_write_all(struct bitmap *bitmap) */ int i; + if (!bitmap || !bitmap->filemap) + return; + if (bitmap->file) + /* Only one copy, so nothing needed */ + return; + spin_lock_irq(&bitmap->lock); for (i = 0; i < bitmap->file_pages; i++) set_page_attr(bitmap, bitmap->filemap[i], @@ -1115,7 +1154,6 @@ void bitmap_daemon_work(struct mddev *mddev) unsigned long nextpage; unsigned long flags; sector_t blocks; - void *paddr; /* Use a mutex to guard daemon_work against * bitmap_destroy. @@ -1142,10 +1180,6 @@ void bitmap_daemon_work(struct mddev *mddev) * we will write it. */ spin_lock_irqsave(&bitmap->lock, flags); - if (!bitmap->filemap) - /* error or shutdown */ - goto out; - for (j = 0; j < bitmap->file_pages; j++) if (test_page_attr(bitmap, bitmap->filemap[j], BITMAP_PAGE_PENDING)) { @@ -1161,11 +1195,14 @@ void bitmap_daemon_work(struct mddev *mddev) * other changes */ bitmap_super_t *sb; bitmap->need_sync = 0; - sb = kmap_atomic(bitmap->sb_page); - sb->events_cleared = - cpu_to_le64(bitmap->events_cleared); - kunmap_atomic(sb); - set_page_attr(bitmap, bitmap->sb_page, BITMAP_PAGE_NEEDWRITE); + if (bitmap->filemap) { + sb = kmap_atomic(bitmap->sb_page); + sb->events_cleared = + cpu_to_le64(bitmap->events_cleared); + kunmap_atomic(sb); + set_page_attr(bitmap, bitmap->sb_page, + BITMAP_PAGE_NEEDWRITE); + } } /* Now look at the bitmap counters and if any are '2' or '1', * decrement and handle accordingly. @@ -1173,6 +1210,7 @@ void bitmap_daemon_work(struct mddev *mddev) nextpage = 0; for (j = 0; j < bitmap->chunks; j++) { bitmap_counter_t *bmc; + sector_t block = (sector_t)j << bitmap->chunkshift; if (j == nextpage) { nextpage += PAGE_COUNTER_RATIO; @@ -1183,7 +1221,7 @@ void bitmap_daemon_work(struct mddev *mddev) bitmap->bp[j >> PAGE_COUNTER_SHIFT].pending = 0; } bmc = bitmap_get_counter(bitmap, - (sector_t)j << bitmap->chunkshift, + block, &blocks, 0); if (!bmc) { @@ -1192,33 +1230,12 @@ void bitmap_daemon_work(struct mddev *mddev) } if (*bmc == 1 && !bitmap->need_sync) { /* We can clear the bit */ - struct page *page; *bmc = 0; - bitmap_count_page( - bitmap, - (sector_t)j << bitmap->chunkshift, - -1); - - page = filemap_get_page(bitmap, j); - paddr = kmap_atomic(page); - if (bitmap->flags & BITMAP_HOSTENDIAN) - clear_bit(file_page_offset(bitmap, j), - paddr); - else - __clear_bit_le(file_page_offset(bitmap, j), - paddr); - kunmap_atomic(paddr); - if (!test_page_attr(bitmap, page, - BITMAP_PAGE_NEEDWRITE)) { - set_page_attr(bitmap, page, - BITMAP_PAGE_PENDING); - bitmap->allclean = 0; - } + bitmap_count_page(bitmap, block, -1); + bitmap_file_clear_bit(bitmap, block); } else if (*bmc && *bmc <= 2) { *bmc = 1; - bitmap_set_pending( - bitmap, - (sector_t)j << bitmap->chunkshift); + bitmap_set_pending(bitmap, block); bitmap->allclean = 0; } } @@ -1249,7 +1266,6 @@ void bitmap_daemon_work(struct mddev *mddev) break; } } -out: spin_unlock_irqrestore(&bitmap->lock, flags); done: @@ -1551,7 +1567,7 @@ EXPORT_SYMBOL(bitmap_cond_end_sync); static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) { /* For each chunk covered by any of these sectors, set the - * counter to 1 and set resync_needed. They should all + * counter to 2 and possibly set resync_needed. They should all * be 0 at this point */ @@ -1678,10 +1694,6 @@ int bitmap_create(struct mddev *mddev) BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); - if (!file - && !mddev->bitmap_info.offset) /* bitmap disabled, nothing to do */ - return 0; - BUG_ON(file && mddev->bitmap_info.offset); bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); @@ -1801,6 +1813,10 @@ int bitmap_load(struct mddev *mddev) if (err) goto out; + bitmap_mask_state(bitmap, BITMAP_STALE, MASK_UNSET); + + /* Kick recovery in case any bits were set */ + set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery); mddev->thread->timeout = mddev->bitmap_info.daemon_sleep; md_wakeup_thread(mddev->thread); diff --git a/drivers/md/md.c b/drivers/md/md.c index 9a677f2078a7..607771bb7e92 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5139,7 +5139,8 @@ int md_run(struct mddev *mddev) err = -EINVAL; mddev->pers->stop(mddev); } - if (err == 0 && mddev->pers->sync_request) { + if (err == 0 && mddev->pers->sync_request && + (mddev->bitmap_info.file || mddev->bitmap_info.offset)) { err = bitmap_create(mddev); if (err) { printk(KERN_ERR "%s: failed to create bitmap (%d)\n", @@ -7847,7 +7848,7 @@ void md_check_recovery(struct mddev *mddev) goto unlock; if (mddev->pers->sync_request) { - if (spares && mddev->bitmap && ! mddev->bitmap->file) { + if (spares) { /* We are adding a device or devices to an array * which has the bitmap stored on all devices. * So make sure all bitmap pages get written -- cgit v1.2.3-59-g8ed1b From 1ec885cdd01a9ad867dbb9fd32a1bfcc0875c486 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 22 May 2012 13:55:10 +1000 Subject: md/bitmap: move some fields of 'struct bitmap' into a 'storage' substruct. This new 'struct bitmap_storage' reflects the external storage of the bitmap. Having this clearly defined will make it easier to change the storage used while the array is active. Signed-off-by: NeilBrown --- drivers/md/bitmap.c | 180 +++++++++++++++++++++++++++------------------------- drivers/md/bitmap.h | 17 +++-- drivers/md/md.c | 9 +-- 3 files changed, 110 insertions(+), 96 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 511f0ed5a5db..c98f2fee1bd0 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -195,6 +195,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) struct md_rdev *rdev = NULL; struct block_device *bdev; struct mddev *mddev = bitmap->mddev; + struct bitmap_storage *store = &bitmap->storage; while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { int size = PAGE_SIZE; @@ -202,8 +203,8 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev; - if (page->index == bitmap->file_pages-1) - size = roundup(bitmap->last_page_size, + if (page->index == store->file_pages-1) + size = roundup(store->last_page_size, bdev_logical_block_size(bdev)); /* Just make sure we aren't corrupting data or * metadata @@ -263,7 +264,7 @@ static void write_page(struct bitmap *bitmap, struct page *page, int wait) { struct buffer_head *bh; - if (bitmap->file == NULL) { + if (bitmap->storage.file == NULL) { switch (write_sb_page(bitmap, page, wait)) { case -EINVAL: bitmap->flags |= BITMAP_WRITE_ERROR; @@ -408,9 +409,9 @@ void bitmap_update_sb(struct bitmap *bitmap) return; if (bitmap->mddev->bitmap_info.external) return; - if (!bitmap->sb_page) /* no superblock */ + if (!bitmap->storage.sb_page) /* no superblock */ return; - sb = kmap_atomic(bitmap->sb_page); + sb = kmap_atomic(bitmap->storage.sb_page); sb->events = cpu_to_le64(bitmap->mddev->events); if (bitmap->mddev->events < bitmap->events_cleared) /* rocking back to read-only */ @@ -421,7 +422,7 @@ void bitmap_update_sb(struct bitmap *bitmap) sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ); sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind); kunmap_atomic(sb); - write_page(bitmap, bitmap->sb_page, 1); + write_page(bitmap, bitmap->storage.sb_page, 1); } /* print out the bitmap file superblock */ @@ -429,9 +430,9 @@ void bitmap_print_sb(struct bitmap *bitmap) { bitmap_super_t *sb; - if (!bitmap || !bitmap->sb_page) + if (!bitmap || !bitmap->storage.sb_page) return; - sb = kmap_atomic(bitmap->sb_page); + sb = kmap_atomic(bitmap->storage.sb_page); printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap)); printk(KERN_DEBUG " magic: %08x\n", le32_to_cpu(sb->magic)); printk(KERN_DEBUG " version: %d\n", le32_to_cpu(sb->version)); @@ -470,15 +471,15 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap) unsigned long chunksize, daemon_sleep, write_behind; int err = -EINVAL; - bitmap->sb_page = alloc_page(GFP_KERNEL); - if (IS_ERR(bitmap->sb_page)) { - err = PTR_ERR(bitmap->sb_page); - bitmap->sb_page = NULL; + bitmap->storage.sb_page = alloc_page(GFP_KERNEL); + if (IS_ERR(bitmap->storage.sb_page)) { + err = PTR_ERR(bitmap->storage.sb_page); + bitmap->storage.sb_page = NULL; return err; } - bitmap->sb_page->index = 0; + bitmap->storage.sb_page->index = 0; - sb = kmap_atomic(bitmap->sb_page); + sb = kmap_atomic(bitmap->storage.sb_page); sb->magic = cpu_to_le32(BITMAP_MAGIC); sb->version = cpu_to_le32(BITMAP_MAJOR_HI); @@ -536,7 +537,7 @@ static int bitmap_read_sb(struct bitmap *bitmap) int err = -EINVAL; struct page *sb_page; - if (!bitmap->file && !bitmap->mddev->bitmap_info.offset) { + if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) { chunksize = 128 * 1024 * 1024; daemon_sleep = 5 * HZ; write_behind = 0; @@ -548,13 +549,13 @@ static int bitmap_read_sb(struct bitmap *bitmap) sb_page = alloc_page(GFP_KERNEL); if (!sb_page) return -ENOMEM; - bitmap->sb_page = sb_page; + bitmap->storage.sb_page = sb_page; - if (bitmap->file) { - loff_t isize = i_size_read(bitmap->file->f_mapping->host); + if (bitmap->storage.file) { + loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host); int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize; - err = read_page(bitmap->file, 0, + err = read_page(bitmap->storage.file, 0, bitmap, bytes, sb_page); } else { err = read_sb_page(bitmap->mddev, @@ -647,9 +648,9 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits, bitmap_super_t *sb; int old; - if (!bitmap->sb_page) /* can't set the state */ + if (!bitmap->storage.sb_page) /* can't set the state */ return 0; - sb = kmap_atomic(bitmap->sb_page); + sb = kmap_atomic(bitmap->storage.sb_page); old = le32_to_cpu(sb->state) & bits; switch (op) { case MASK_SET: @@ -678,17 +679,19 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits, * file a page at a time. There's a superblock at the start of the file. */ /* calculate the index of the page that contains this bit */ -static inline unsigned long file_page_index(struct bitmap *bitmap, unsigned long chunk) +static inline unsigned long file_page_index(struct bitmap_storage *store, + unsigned long chunk) { - if (!bitmap->mddev->bitmap_info.external) + if (store->sb_page) chunk += sizeof(bitmap_super_t) << 3; return chunk >> PAGE_BIT_SHIFT; } /* calculate the (bit) offset of this bit within a page */ -static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned long chunk) +static inline unsigned long file_page_offset(struct bitmap_storage *store, + unsigned long chunk) { - if (!bitmap->mddev->bitmap_info.external) + if (store->sb_page) chunk += sizeof(bitmap_super_t) << 3; return chunk & (PAGE_BITS - 1); } @@ -700,13 +703,13 @@ static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned lon * 1 page (e.g., x86) or less than 1 page -- so the bitmap might start on page * 0 or page 1 */ -static inline struct page *filemap_get_page(struct bitmap *bitmap, +static inline struct page *filemap_get_page(struct bitmap_storage *store, unsigned long chunk) { - if (file_page_index(bitmap, chunk) >= bitmap->file_pages) + if (file_page_index(store, chunk) >= store->file_pages) return NULL; - return bitmap->filemap[file_page_index(bitmap, chunk) - - file_page_index(bitmap, 0)]; + return store->filemap[file_page_index(store, chunk) + - file_page_index(store, 0)]; } static void bitmap_file_unmap(struct bitmap *bitmap) @@ -715,16 +718,17 @@ static void bitmap_file_unmap(struct bitmap *bitmap) unsigned long *attr; int pages; unsigned long flags; + struct bitmap_storage *store = &bitmap->storage; spin_lock_irqsave(&bitmap->lock, flags); - map = bitmap->filemap; - bitmap->filemap = NULL; - attr = bitmap->filemap_attr; - bitmap->filemap_attr = NULL; - pages = bitmap->file_pages; - bitmap->file_pages = 0; - sb_page = bitmap->sb_page; - bitmap->sb_page = NULL; + map = store->filemap; + store->filemap = NULL; + attr = store->filemap_attr; + store->filemap_attr = NULL; + pages = store->file_pages; + store->file_pages = 0; + sb_page = store->sb_page; + store->sb_page = NULL; spin_unlock_irqrestore(&bitmap->lock, flags); while (pages--) @@ -743,8 +747,8 @@ static void bitmap_file_put(struct bitmap *bitmap) unsigned long flags; spin_lock_irqsave(&bitmap->lock, flags); - file = bitmap->file; - bitmap->file = NULL; + file = bitmap->storage.file; + bitmap->storage.file = NULL; spin_unlock_irqrestore(&bitmap->lock, flags); if (file) @@ -771,11 +775,11 @@ static void bitmap_file_kick(struct bitmap *bitmap) if (bitmap_mask_state(bitmap, BITMAP_STALE, MASK_SET) == 0) { bitmap_update_sb(bitmap); - if (bitmap->file) { + if (bitmap->storage.file) { path = kmalloc(PAGE_SIZE, GFP_KERNEL); if (path) - ptr = d_path(&bitmap->file->f_path, path, - PAGE_SIZE); + ptr = d_path(&bitmap->storage.file->f_path, + path, PAGE_SIZE); printk(KERN_ALERT "%s: kicking failed bitmap file %s from array!\n", @@ -803,19 +807,19 @@ enum bitmap_page_attr { static inline void set_page_attr(struct bitmap *bitmap, int pnum, enum bitmap_page_attr attr) { - __set_bit((pnum<<2) + attr, bitmap->filemap_attr); + __set_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); } static inline void clear_page_attr(struct bitmap *bitmap, int pnum, enum bitmap_page_attr attr) { - __clear_bit((pnum<<2) + attr, bitmap->filemap_attr); + __clear_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); } static inline unsigned long test_page_attr(struct bitmap *bitmap, int pnum, enum bitmap_page_attr attr) { - return test_bit((pnum<<2) + attr, bitmap->filemap_attr); + return test_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); } /* @@ -832,10 +836,10 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) void *kaddr; unsigned long chunk = block >> bitmap->chunkshift; - page = filemap_get_page(bitmap, chunk); + page = filemap_get_page(&bitmap->storage, chunk); if (!page) return; - bit = file_page_offset(bitmap, chunk); + bit = file_page_offset(&bitmap->storage, chunk); /* set the bit */ kaddr = kmap_atomic(page); @@ -856,10 +860,10 @@ static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block) void *paddr; unsigned long chunk = block >> bitmap->chunkshift; - page = filemap_get_page(bitmap, chunk); + page = filemap_get_page(&bitmap->storage, chunk); if (!page) return; - bit = file_page_offset(bitmap, chunk); + bit = file_page_offset(&bitmap->storage, chunk); paddr = kmap_atomic(page); if (bitmap->flags & BITMAP_HOSTENDIAN) clear_bit(bit, paddr); @@ -881,14 +885,14 @@ void bitmap_unplug(struct bitmap *bitmap) int dirty, need_write; int wait = 0; - if (!bitmap || !bitmap->filemap) + if (!bitmap || !bitmap->storage.filemap) return; /* look at each page to see if there are any set bits that need to be * flushed out to disk */ - for (i = 0; i < bitmap->file_pages; i++) { + for (i = 0; i < bitmap->storage.file_pages; i++) { spin_lock_irqsave(&bitmap->lock, flags); - if (!bitmap->filemap) { + if (!bitmap->storage.filemap) { spin_unlock_irqrestore(&bitmap->lock, flags); return; } @@ -903,10 +907,10 @@ void bitmap_unplug(struct bitmap *bitmap) spin_unlock_irqrestore(&bitmap->lock, flags); if (dirty || need_write) - write_page(bitmap, bitmap->filemap[i], 0); + write_page(bitmap, bitmap->storage.filemap[i], 0); } if (wait) { /* if any writes were performed, we need to wait on them */ - if (bitmap->file) + if (bitmap->storage.file) wait_event(bitmap->write_wait, atomic_read(&bitmap->pending_writes)==0); else @@ -940,14 +944,15 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) int outofdate; int ret = -ENOSPC; void *paddr; + struct bitmap_storage *store = &bitmap->storage; chunks = bitmap->chunks; - file = bitmap->file; + file = store->file; if (!file && !bitmap->mddev->bitmap_info.offset) { /* No permanent bitmap - fill with '1s'. */ - bitmap->filemap = NULL; - bitmap->file_pages = 0; + store->filemap = NULL; + store->file_pages = 0; for (i = 0; i < chunks ; i++) { /* if the disk bit is set, set the memory bit */ int needed = ((sector_t)(i+1) << (bitmap->chunkshift) @@ -980,39 +985,40 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) ret = -ENOMEM; - bitmap->filemap = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL); - if (!bitmap->filemap) + store->filemap = kmalloc(sizeof(struct page *) + * num_pages, GFP_KERNEL); + if (!store->filemap) goto err; pnum = 0; offset = 0; - if (bitmap->sb_page) { - bitmap->filemap[0] = bitmap->sb_page; + if (store->sb_page) { + store->filemap[0] = store->sb_page; pnum = 1; offset = sizeof(bitmap_super_t); } for ( ; pnum < num_pages; pnum++) { - bitmap->filemap[pnum] = alloc_page(GFP_KERNEL); - if (!bitmap->filemap[pnum]) { - bitmap->file_pages = pnum; + store->filemap[pnum] = alloc_page(GFP_KERNEL); + if (!store->filemap[pnum]) { + store->file_pages = pnum; goto err; } } - bitmap->file_pages = pnum; + store->file_pages = pnum; /* We need 4 bits per page, rounded up to a multiple of sizeof(unsigned long) */ - bitmap->filemap_attr = kzalloc( + store->filemap_attr = kzalloc( roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)), GFP_KERNEL); - if (!bitmap->filemap_attr) + if (!store->filemap_attr) goto err; oldindex = ~0L; for (i = 0; i < chunks; i++) { int b; - index = file_page_index(bitmap, i); - bit = file_page_offset(bitmap, i); + index = file_page_index(&bitmap->storage, i); + bit = file_page_offset(&bitmap->storage, i); if (index != oldindex) { /* this is a new page, read it in */ int count; /* unmap the old page, we're done with it */ @@ -1020,7 +1026,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) count = bytes - index * PAGE_SIZE; else count = PAGE_SIZE; - page = bitmap->filemap[index]; + page = store->filemap[index]; if (file) ret = read_page(file, index, bitmap, count, page); @@ -1036,7 +1042,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) oldindex = index; - bitmap->last_page_size = count; + store->last_page_size = count; if (outofdate) { /* @@ -1074,7 +1080,8 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) printk(KERN_INFO "%s: bitmap initialized from disk: " "read %lu/%lu pages, set %lu of %lu bits\n", - bmname(bitmap), bitmap->file_pages, num_pages, bit_cnt, chunks); + bmname(bitmap), store->file_pages, + num_pages, bit_cnt, chunks); return 0; @@ -1091,14 +1098,14 @@ void bitmap_write_all(struct bitmap *bitmap) */ int i; - if (!bitmap || !bitmap->filemap) + if (!bitmap || !bitmap->storage.filemap) return; - if (bitmap->file) + if (bitmap->storage.file) /* Only one copy, so nothing needed */ return; spin_lock_irq(&bitmap->lock); - for (i = 0; i < bitmap->file_pages; i++) + for (i = 0; i < bitmap->storage.file_pages; i++) set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE); bitmap->allclean = 0; @@ -1165,7 +1172,7 @@ void bitmap_daemon_work(struct mddev *mddev) * we will write it. */ spin_lock_irqsave(&bitmap->lock, flags); - for (j = 0; j < bitmap->file_pages; j++) + for (j = 0; j < bitmap->storage.file_pages; j++) if (test_page_attr(bitmap, j, BITMAP_PAGE_PENDING)) { set_page_attr(bitmap, j, @@ -1180,8 +1187,8 @@ void bitmap_daemon_work(struct mddev *mddev) * other changes */ bitmap_super_t *sb; bitmap->need_sync = 0; - if (bitmap->filemap) { - sb = kmap_atomic(bitmap->sb_page); + if (bitmap->storage.filemap) { + sb = kmap_atomic(bitmap->storage.sb_page); sb->events_cleared = cpu_to_le64(bitmap->events_cleared); kunmap_atomic(sb); @@ -1233,7 +1240,7 @@ void bitmap_daemon_work(struct mddev *mddev) * the first blocking holds the superblock and it has been updated. * We mustn't write any other blocks before the superblock. */ - for (j = 0; j < bitmap->file_pages; j++) { + for (j = 0; j < bitmap->storage.file_pages; j++) { if (test_page_attr(bitmap, j, BITMAP_PAGE_DIRTY)) @@ -1244,9 +1251,9 @@ void bitmap_daemon_work(struct mddev *mddev) clear_page_attr(bitmap, j, BITMAP_PAGE_NEEDWRITE); spin_unlock_irqrestore(&bitmap->lock, flags); - write_page(bitmap, bitmap->filemap[j], 0); + write_page(bitmap, bitmap->storage.filemap[j], 0); spin_lock_irqsave(&bitmap->lock, flags); - if (!bitmap->filemap) + if (!bitmap->storage.filemap) break; } } @@ -1700,7 +1707,7 @@ int bitmap_create(struct mddev *mddev) } else bitmap->sysfs_can_clear = NULL; - bitmap->file = file; + bitmap->storage.file = file; if (file) { get_file(file); /* As future accesses to this file will use bmap, @@ -1832,9 +1839,9 @@ void bitmap_status(struct seq_file *seq, struct bitmap *bitmap) << (PAGE_SHIFT - 10), chunk_kb ? chunk_kb : bitmap->mddev->bitmap_info.chunksize, chunk_kb ? "KB" : "B"); - if (bitmap->file) { + if (bitmap->storage.file) { seq_printf(seq, ", file: "); - seq_path(seq, &bitmap->file->f_path, " \t\n"); + seq_path(seq, &bitmap->storage.file->f_path, " \t\n"); } seq_printf(seq, "\n"); @@ -1958,8 +1965,9 @@ space_store(struct mddev *mddev, const char *buf, size_t len) return -EINVAL; if (mddev->bitmap && - sectors < ((mddev->bitmap->file_pages - 1) * PAGE_SIZE - + mddev->bitmap->last_page_size + 511) >> 9) + sectors < ((mddev->bitmap->storage.file_pages - 1) + * PAGE_SIZE + + mddev->bitmap->storage.last_page_size + 511) >> 9) return -EFBIG; /* Bitmap is too big for this small space */ /* could make sure it isn't too big, but that isn't really diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h index 79e17983473a..162ab095b866 100644 --- a/drivers/md/bitmap.h +++ b/drivers/md/bitmap.h @@ -188,12 +188,17 @@ struct bitmap { /* bitmap spinlock */ spinlock_t lock; - struct file *file; /* backing disk file */ - struct page *sb_page; /* cached copy of the bitmap file superblock */ - struct page **filemap; /* list of cache pages for the file */ - unsigned long *filemap_attr; /* attributes associated w/ filemap pages */ - unsigned long file_pages; /* number of pages in the file */ - int last_page_size; /* bytes in the last page */ + struct bitmap_storage { + struct file *file; /* backing disk file */ + struct page *sb_page; /* cached copy of the bitmap + * file superblock */ + struct page **filemap; /* list of cache pages for + * the file */ + unsigned long *filemap_attr; /* attributes associated + * w/ filemap pages */ + unsigned long file_pages; /* number of pages in the file*/ + int last_page_size; /* bytes in the last page */ + } storage; unsigned long flags; diff --git a/drivers/md/md.c b/drivers/md/md.c index 607771bb7e92..9e2336fbbd31 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1989,7 +1989,7 @@ super_1_allow_new_offset(struct md_rdev *rdev, bitmap = rdev->mddev->bitmap; if (bitmap && !rdev->mddev->bitmap_info.file && rdev->sb_start + rdev->mddev->bitmap_info.offset + - bitmap->file_pages * (PAGE_SIZE>>9) > new_offset) + bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset) return 0; if (rdev->badblocks.sector + rdev->badblocks.size > new_offset) return 0; @@ -5649,7 +5649,7 @@ static int get_bitmap_file(struct mddev * mddev, void __user * arg) goto out; /* bitmap disabled, zero the first byte and copy out */ - if (!mddev->bitmap || !mddev->bitmap->file) { + if (!mddev->bitmap || !mddev->bitmap->storage.file) { file->pathname[0] = '\0'; goto copy_out; } @@ -5658,7 +5658,8 @@ static int get_bitmap_file(struct mddev * mddev, void __user * arg) if (!buf) goto out; - ptr = d_path(&mddev->bitmap->file->f_path, buf, sizeof(file->pathname)); + ptr = d_path(&mddev->bitmap->storage.file->f_path, + buf, sizeof(file->pathname)); if (IS_ERR(ptr)) goto out; @@ -6299,7 +6300,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) /* remove the bitmap */ if (!mddev->bitmap) return -ENOENT; - if (mddev->bitmap->file) + if (mddev->bitmap->storage.file) return -EINVAL; mddev->pers->quiesce(mddev, 1); bitmap_destroy(mddev); -- cgit v1.2.3-59-g8ed1b From a4a6125a074e1b08ee8ae34f700c5bca19eb9d18 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 22 May 2012 13:55:27 +1000 Subject: md: allow array to be resized while bitmap is present. Now that bitmaps can be resized, we can allow an array to be resized while the bitmap is present. This only covers resizing that involves changing the effective size of member devices, not resizing that changes the number of devices. Signed-off-by: NeilBrown --- drivers/md/md.c | 6 +----- drivers/md/raid1.c | 11 +++++++++-- drivers/md/raid10.c | 10 ++++++++-- drivers/md/raid5.c | 14 ++++++++++---- 4 files changed, 28 insertions(+), 13 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 9e2336fbbd31..86adf4ac46cf 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6153,11 +6153,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) */ if (mddev->sync_thread) return -EBUSY; - if (mddev->bitmap) - /* Sorry, cannot grow a bitmap yet, just remove it, - * grow, and re-add. - */ - return -EBUSY; + rdev_for_each(rdev, mddev) { sector_t avail = rdev->sectors; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 22cfc6660b18..8e717bd518e7 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2752,9 +2752,16 @@ static int raid1_resize(struct mddev *mddev, sector_t sectors) * any io in the removed space completes, but it hardly seems * worth it. */ - md_set_array_sectors(mddev, raid1_size(mddev, sectors, 0)); - if (mddev->array_sectors > raid1_size(mddev, sectors, 0)) + sector_t newsize = raid1_size(mddev, sectors, 0); + if (mddev->external_size && + mddev->array_sectors > newsize) return -EINVAL; + if (mddev->bitmap) { + int ret = bitmap_resize(mddev->bitmap, newsize, 0, 0); + if (ret) + return ret; + } + md_set_array_sectors(mddev, newsize); set_capacity(mddev->gendisk, mddev->array_sectors); revalidate_disk(mddev->gendisk); if (sectors > mddev->dev_sectors && diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index fb9062b5022c..8fe3aa469987 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3678,9 +3678,15 @@ static int raid10_resize(struct mddev *mddev, sector_t sectors) oldsize = raid10_size(mddev, 0, 0); size = raid10_size(mddev, sectors, 0); - md_set_array_sectors(mddev, size); - if (mddev->array_sectors > size) + if (mddev->external_size && + mddev->array_sectors > size) return -EINVAL; + if (mddev->bitmap) { + int ret = bitmap_resize(mddev->bitmap, size, 0, 0); + if (ret) + return ret; + } + md_set_array_sectors(mddev, size); set_capacity(mddev->gendisk, mddev->array_sectors); revalidate_disk(mddev->gendisk); if (sectors > mddev->dev_sectors && diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7bfd59b313d7..eab6168bb7f4 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5503,12 +5503,18 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors) * any io in the removed space completes, but it hardly seems * worth it. */ + sector_t newsize; sectors &= ~((sector_t)mddev->chunk_sectors - 1); - md_set_array_sectors(mddev, raid5_size(mddev, sectors, - mddev->raid_disks)); - if (mddev->array_sectors > - raid5_size(mddev, sectors, mddev->raid_disks)) + newsize = raid5_size(mddev, sectors, mddev->raid_disks); + if (mddev->external_size && + mddev->array_sectors > newsize) return -EINVAL; + if (mddev->bitmap) { + int ret = bitmap_resize(mddev->bitmap, sectors, 0, 0); + if (ret) + return ret; + } + md_set_array_sectors(mddev, newsize); set_capacity(mddev->gendisk, mddev->array_sectors); revalidate_disk(mddev->gendisk); if (sectors > mddev->dev_sectors && -- cgit v1.2.3-59-g8ed1b From 47525e59e40ffb8cbc944c0055e9c4902cd3ee99 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Tue, 22 May 2012 13:55:29 +1000 Subject: DM RAID: Set recovery flags on resume Properly initialize MD recovery flags when resuming device-mapper devices. When a device-mapper device is suspended, all I/O must stop. This is done by calling 'md_stop_writes' and 'mddev_suspend'. These calls in-turn manipulate the recovery flags - including setting 'MD_RECOVERY_FROZEN'. The DM device may have been suspended while recovery was not yet complete, so the process needs to pick-up where it left off. Since 'mddev_resume' does not unset 'MD_RECOVERY_FROZEN' and set 'MD_RECOVERY_NEEDED', we must do it ourselves. 'MD_RECOVERY_NEEDED' can safely be set in 'mddev_resume', but 'MD_RECOVERY_FROZEN' must be set outside of 'mddev_resume' due to how MD handles RAID reshaping. (e.g. It is possible for a user to delay reshaping a RAID5->RAID6 by purposefully setting 'MD_RECOVERY_FROZEN'. Clearing it in 'mddev_resume' would override the desired behavior.) Because 'mddev_resume' already unconditionally calls 'md_wakeup_thread(mddev->thread)' there is no need to make this call from 'raid_resume' since it calls 'mddev_resume'. Also clean up where level_store calls mddev_resume() - it current duplicates some of the funcitons of that call. - NB Signed-off-by: Jonathan Brassow Signed-off-by: NeilBrown --- drivers/md/dm-raid.c | 4 ++-- drivers/md/md.c | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 73a068da10d9..ea2d90c78f78 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1252,9 +1252,9 @@ static void raid_resume(struct dm_target *ti) if (!rs->bitmap_loaded) { bitmap_load(&rs->md); rs->bitmap_loaded = 1; - } else - md_wakeup_thread(rs->md.thread); + } + clear_bit(MD_RECOVERY_FROZEN, &rs->md.recovery); mddev_resume(&rs->md); } diff --git a/drivers/md/md.c b/drivers/md/md.c index 86adf4ac46cf..4c9836885d3e 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -402,6 +402,7 @@ void mddev_resume(struct mddev *mddev) wake_up(&mddev->sb_wait); mddev->pers->quiesce(mddev, 0); + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ } @@ -3673,10 +3674,8 @@ level_store(struct mddev *mddev, const char *buf, size_t len) del_timer_sync(&mddev->safemode_timer); } pers->run(mddev); - mddev_resume(mddev); set_bit(MD_CHANGE_DEVS, &mddev->flags); - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - md_wakeup_thread(mddev->thread); + mddev_resume(mddev); sysfs_notify(&mddev->kobj, NULL, "level"); md_new_event(mddev); return rv; -- cgit v1.2.3-59-g8ed1b From 0c098220e2320c1f0c9339d0ff05c5e04672133a Mon Sep 17 00:00:00 2001 From: Yuanhan Liu Date: Tue, 22 May 2012 13:55:32 +1000 Subject: md: check the return of mddev_find() Check the return of mddev_find(), since it may fail due to out of memeory or out of usable minor number. The reason I chose -ENODEV instead of -ENOMEM or something else is md_alloc() function chose that ;) Signed-off-by: Yuanhan Liu Signed-off-by: NeilBrown --- drivers/md/md.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 4c9836885d3e..1c2f9048e1ae 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6618,6 +6618,9 @@ static int md_open(struct block_device *bdev, fmode_t mode) struct mddev *mddev = mddev_find(bdev->bd_dev); int err; + if (!mddev) + return -ENODEV; + if (mddev->gendisk != bdev->bd_disk) { /* we are racing with mddev_put which is discarding this * bd_disk. -- cgit v1.2.3-59-g8ed1b