aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-raid.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-raid.c')
-rw-r--r--drivers/md/dm-raid.c164
1 files changed, 88 insertions, 76 deletions
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index b0aa595e4375..c412eaa975fc 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -209,6 +209,7 @@ struct raid_dev {
#define RT_FLAG_RS_SUSPENDED 5
#define RT_FLAG_RS_IN_SYNC 6
#define RT_FLAG_RS_RESYNCING 7
+#define RT_FLAG_RS_GROW 8
/* Array elements of 64 bit needed for rebuild/failed disk bits */
#define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8)
@@ -241,6 +242,9 @@ struct raid_set {
struct raid_type *raid_type;
struct dm_target_callbacks callbacks;
+ sector_t array_sectors;
+ sector_t dev_sectors;
+
/* Optional raid4/5/6 journal device */
struct journal_dev {
struct dm_dev *dev;
@@ -616,7 +620,6 @@ static int raid10_format_to_md_layout(struct raid_set *rs,
} else if (algorithm == ALGORITHM_RAID10_FAR) {
f = copies;
- r = !RAID10_OFFSET;
if (!test_bit(__CTR_FLAG_RAID10_USE_NEAR_SETS, &rs->ctr_flags))
r |= RAID10_USE_FAR_SETS;
@@ -1615,13 +1618,12 @@ static int _check_data_dev_sectors(struct raid_set *rs)
}
/* Calculate the sectors per device and per array used for @rs */
-static int rs_set_dev_and_array_sectors(struct raid_set *rs, bool use_mddev)
+static int rs_set_dev_and_array_sectors(struct raid_set *rs, sector_t sectors, bool use_mddev)
{
int delta_disks;
unsigned int data_stripes;
+ sector_t array_sectors = sectors, dev_sectors = sectors;
struct mddev *mddev = &rs->md;
- struct md_rdev *rdev;
- sector_t array_sectors = rs->ti->len, dev_sectors = rs->ti->len;
if (use_mddev) {
delta_disks = mddev->delta_disks;
@@ -1656,12 +1658,9 @@ static int rs_set_dev_and_array_sectors(struct raid_set *rs, bool use_mddev)
/* Striped layouts */
array_sectors = (data_stripes + delta_disks) * dev_sectors;
- rdev_for_each(rdev, mddev)
- if (!test_bit(Journal, &rdev->flags))
- rdev->sectors = dev_sectors;
-
mddev->array_sectors = array_sectors;
mddev->dev_sectors = dev_sectors;
+ rs_set_rdev_sectors(rs);
return _check_data_dev_sectors(rs);
bad:
@@ -1670,7 +1669,7 @@ bad:
}
/* Setup recovery on @rs */
-static void __rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors)
+static void rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors)
{
/* raid0 does not recover */
if (rs_is_raid0(rs))
@@ -1691,22 +1690,6 @@ static void __rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors)
? MaxSector : dev_sectors;
}
-/* Setup recovery on @rs based on raid type, device size and 'nosync' flag */
-static void rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors)
-{
- if (!dev_sectors)
- /* New raid set or 'sync' flag provided */
- __rs_setup_recovery(rs, 0);
- else if (dev_sectors == MaxSector)
- /* Prevent recovery */
- __rs_setup_recovery(rs, MaxSector);
- else if (__rdev_sectors(rs) < dev_sectors)
- /* Grown raid set */
- __rs_setup_recovery(rs, __rdev_sectors(rs));
- else
- __rs_setup_recovery(rs, MaxSector);
-}
-
static void do_table_event(struct work_struct *ws)
{
struct raid_set *rs = container_of(ws, struct raid_set, md.event_work);
@@ -2474,7 +2457,7 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev)
return -EINVAL;
}
- /* Enable bitmap creation for RAID levels != 0 */
+ /* Enable bitmap creation on @rs unless no metadevs or raid0 or journaled raid4/5/6 set. */
mddev->bitmap_info.offset = (rt_is_raid0(rs->raid_type) || rs->journal_dev.dev) ? 0 : to_sector(4096);
mddev->bitmap_info.default_offset = mddev->bitmap_info.offset;
@@ -2911,7 +2894,7 @@ static int rs_setup_reshape(struct raid_set *rs)
/* Remove disk(s) */
} else if (rs->delta_disks < 0) {
- r = rs_set_dev_and_array_sectors(rs, true);
+ r = rs_set_dev_and_array_sectors(rs, rs->ti->len, true);
mddev->reshape_backwards = 1; /* removing disk(s) -> backward reshape */
/* Change layout and/or chunk size */
@@ -3008,7 +2991,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
bool resize = false;
struct raid_type *rt;
unsigned int num_raid_params, num_raid_devs;
- sector_t calculated_dev_sectors, rdev_sectors, reshape_sectors;
+ sector_t sb_array_sectors, rdev_sectors, reshape_sectors;
struct raid_set *rs = NULL;
const char *arg;
struct rs_layout rs_layout;
@@ -3067,11 +3050,13 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
*
* Any existing superblock will overwrite the array and device sizes
*/
- r = rs_set_dev_and_array_sectors(rs, false);
+ r = rs_set_dev_and_array_sectors(rs, rs->ti->len, false);
if (r)
goto bad;
- calculated_dev_sectors = rs->md.dev_sectors;
+ /* Memorize just calculated, potentially larger sizes to grow the raid set in preresume */
+ rs->array_sectors = rs->md.array_sectors;
+ rs->dev_sectors = rs->md.dev_sectors;
/*
* Backup any new raid set level, layout, ...
@@ -3084,6 +3069,8 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (r)
goto bad;
+ /* All in-core metadata now as of current superblocks after calling analyse_superblocks() */
+ sb_array_sectors = rs->md.array_sectors;
rdev_sectors = __rdev_sectors(rs);
if (!rdev_sectors) {
ti->error = "Invalid rdev size";
@@ -3093,8 +3080,11 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
reshape_sectors = _get_reshape_sectors(rs);
- if (calculated_dev_sectors != rdev_sectors)
- resize = calculated_dev_sectors != (reshape_sectors ? rdev_sectors - reshape_sectors : rdev_sectors);
+ if (rs->dev_sectors != rdev_sectors) {
+ resize = (rs->dev_sectors != rdev_sectors - reshape_sectors);
+ if (rs->dev_sectors > rdev_sectors - reshape_sectors)
+ set_bit(RT_FLAG_RS_GROW, &rs->runtime_flags);
+ }
INIT_WORK(&rs->md.event_work, do_table_event);
ti->private = rs;
@@ -3121,13 +3111,8 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
rs_set_new(rs);
} else if (rs_is_recovering(rs)) {
- /* Rebuild particular devices */
- if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) {
- set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
- rs_setup_recovery(rs, MaxSector);
- }
/* A recovering raid set may be resized */
- ; /* skip setup rs */
+ goto size_check;
} else if (rs_is_reshaping(rs)) {
/* Have to reject size change request during reshape */
if (resize) {
@@ -3171,6 +3156,9 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
rs_setup_recovery(rs, MaxSector);
rs_set_new(rs);
} else if (rs_reshape_requested(rs)) {
+ /* Only request grow on raid set size extensions, not on reshapes. */
+ clear_bit(RT_FLAG_RS_GROW, &rs->runtime_flags);
+
/*
* No need to check for 'ongoing' takeover here, because takeover
* is an instant operation as oposed to an ongoing reshape.
@@ -3201,13 +3189,31 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
rs_set_cur(rs);
} else {
+size_check:
/* May not set recovery when a device rebuild is requested */
if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) {
- rs_setup_recovery(rs, MaxSector);
+ clear_bit(RT_FLAG_RS_GROW, &rs->runtime_flags);
set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
- } else
- rs_setup_recovery(rs, test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags) ?
- 0 : (resize ? calculated_dev_sectors : MaxSector));
+ rs_setup_recovery(rs, MaxSector);
+ } else if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags)) {
+ /*
+ * Set raid set to current size, i.e. size as of
+ * superblocks to grow to larger size in preresume.
+ */
+ r = rs_set_dev_and_array_sectors(rs, sb_array_sectors, false);
+ if (r)
+ goto bad;
+
+ rs_setup_recovery(rs, rs->md.recovery_cp < rs->md.dev_sectors ? rs->md.recovery_cp : rs->md.dev_sectors);
+ } else {
+ /* This is no size change or it is shrinking, update size and record in superblocks */
+ r = rs_set_dev_and_array_sectors(rs, rs->ti->len, false);
+ if (r)
+ goto bad;
+
+ if (sb_array_sectors > rs->array_sectors)
+ set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
+ }
rs_set_cur(rs);
}
@@ -3406,10 +3412,9 @@ static const char *__raid_dev_status(struct raid_set *rs, struct md_rdev *rdev)
/* Helper to return resync/reshape progress for @rs and runtime flags for raid set in sync / resynching */
static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
- sector_t resync_max_sectors)
+ enum sync_state state, sector_t resync_max_sectors)
{
sector_t r;
- enum sync_state state;
struct mddev *mddev = &rs->md;
clear_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
@@ -3420,8 +3425,6 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
} else {
- state = decipher_sync_action(mddev, recovery);
-
if (state == st_idle && !test_bit(MD_RECOVERY_INTR, &recovery))
r = mddev->recovery_cp;
else
@@ -3439,18 +3442,14 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
/*
* In case we are recovering, the array is not in sync
* and health chars should show the recovering legs.
+ *
+ * Already retrieved recovery offset from curr_resync_completed above.
*/
;
- else if (state == st_resync)
- /*
- * If "resync" is occurring, the raid set
- * is or may be out of sync hence the health
- * characters shall be 'a'.
- */
- set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
- else if (state == st_reshape)
+
+ else if (state == st_resync || state == st_reshape)
/*
- * If "reshape" is occurring, the raid set
+ * If "resync/reshape" is occurring, the raid set
* is or may be out of sync hence the health
* characters shall be 'a'.
*/
@@ -3464,22 +3463,22 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
*/
set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
- else {
- struct md_rdev *rdev;
-
+ else if (test_bit(MD_RECOVERY_NEEDED, &recovery))
/*
* We are idle and recovery is needed, prevent 'A' chars race
* caused by components still set to in-sync by constructor.
*/
- if (test_bit(MD_RECOVERY_NEEDED, &recovery))
- set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
+ set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
+ else {
/*
- * The raid set may be doing an initial sync, or it may
- * be rebuilding individual components. If all the
- * devices are In_sync, then it is the raid set that is
- * being initialized.
+ * We are idle and the raid set may be doing an initial
+ * sync, or it may be rebuilding individual components.
+ * If all the devices are In_sync, then it is the raid set
+ * that is being initialized.
*/
+ struct md_rdev *rdev;
+
set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
rdev_for_each(rdev, mddev)
if (!test_bit(Journal, &rdev->flags) &&
@@ -3512,7 +3511,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
unsigned int rebuild_disks;
unsigned int write_mostly_params = 0;
sector_t progress, resync_max_sectors, resync_mismatches;
- const char *sync_action;
+ enum sync_state state;
struct raid_type *rt;
switch (type) {
@@ -3526,14 +3525,14 @@ static void raid_status(struct dm_target *ti, status_type_t type,
/* Access most recent mddev properties for status output */
smp_rmb();
- recovery = rs->md.recovery;
/* Get sensible max sectors even if raid set not yet started */
resync_max_sectors = test_bit(RT_FLAG_RS_PRERESUMED, &rs->runtime_flags) ?
mddev->resync_max_sectors : mddev->dev_sectors;
- progress = rs_get_progress(rs, recovery, resync_max_sectors);
+ recovery = rs->md.recovery;
+ state = decipher_sync_action(mddev, recovery);
+ progress = rs_get_progress(rs, recovery, state, resync_max_sectors);
resync_mismatches = (mddev->last_sync_action && !strcasecmp(mddev->last_sync_action, "check")) ?
atomic64_read(&mddev->resync_mismatches) : 0;
- sync_action = sync_str(decipher_sync_action(&rs->md, recovery));
/* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */
for (i = 0; i < rs->raid_disks; i++)
@@ -3561,7 +3560,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
* See Documentation/admin-guide/device-mapper/dm-raid.rst for
* information on each of these states.
*/
- DMEMIT(" %s", sync_action);
+ DMEMIT(" %s", sync_str(state));
/*
* v1.5.0+:
@@ -3955,11 +3954,22 @@ static int raid_preresume(struct dm_target *ti)
if (r)
return r;
- /* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) */
- if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap &&
- mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)) {
- r = md_bitmap_resize(mddev->bitmap, mddev->dev_sectors,
- to_bytes(rs->requested_bitmap_chunk_sectors), 0);
+ /* We are extending the raid set size, adjust mddev/md_rdev sizes and set capacity. */
+ if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags)) {
+ mddev->array_sectors = rs->array_sectors;
+ mddev->dev_sectors = rs->dev_sectors;
+ rs_set_rdev_sectors(rs);
+ rs_set_capacity(rs);
+ }
+
+ /* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) or grown device size */
+ if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap &&
+ (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags) ||
+ (rs->requested_bitmap_chunk_sectors &&
+ mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)))) {
+ int chunksize = to_bytes(rs->requested_bitmap_chunk_sectors) ?: mddev->bitmap_info.chunksize;
+
+ r = md_bitmap_resize(mddev->bitmap, mddev->dev_sectors, chunksize, 0);
if (r)
DMERR("Failed to resize bitmap");
}
@@ -3968,8 +3978,10 @@ static int raid_preresume(struct dm_target *ti)
/* Be prepared for mddev_resume() in raid_resume() */
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) {
- set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+ set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
mddev->resync_min = mddev->recovery_cp;
+ if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags))
+ mddev->resync_max_sectors = mddev->dev_sectors;
}
/* Check for any reshape request unless new raid set */
@@ -4017,7 +4029,7 @@ static void raid_resume(struct dm_target *ti)
static struct target_type raid_target = {
.name = "raid",
- .version = {1, 14, 0},
+ .version = {1, 15, 0},
.module = THIS_MODULE,
.ctr = raid_ctr,
.dtr = raid_dtr,