aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c255
1 files changed, 200 insertions, 55 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 805b33e27496..4824d50526fa 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -125,74 +125,165 @@ static inline int speed_max(struct mddev *mddev)
mddev->sync_speed_max : sysctl_speed_limit_max;
}
-static int rdev_init_wb(struct md_rdev *rdev)
+static void rdev_uninit_serial(struct md_rdev *rdev)
{
- if (rdev->bdev->bd_queue->nr_hw_queues == 1)
+ if (!test_and_clear_bit(CollisionCheck, &rdev->flags))
+ return;
+
+ kvfree(rdev->serial);
+ rdev->serial = NULL;
+}
+
+static void rdevs_uninit_serial(struct mddev *mddev)
+{
+ struct md_rdev *rdev;
+
+ rdev_for_each(rdev, mddev)
+ rdev_uninit_serial(rdev);
+}
+
+static int rdev_init_serial(struct md_rdev *rdev)
+{
+ /* serial_nums equals with BARRIER_BUCKETS_NR */
+ int i, serial_nums = 1 << ((PAGE_SHIFT - ilog2(sizeof(atomic_t))));
+ struct serial_in_rdev *serial = NULL;
+
+ if (test_bit(CollisionCheck, &rdev->flags))
return 0;
- spin_lock_init(&rdev->wb_list_lock);
- INIT_LIST_HEAD(&rdev->wb_list);
- init_waitqueue_head(&rdev->wb_io_wait);
- set_bit(WBCollisionCheck, &rdev->flags);
+ serial = kvmalloc(sizeof(struct serial_in_rdev) * serial_nums,
+ GFP_KERNEL);
+ if (!serial)
+ return -ENOMEM;
- return 1;
+ for (i = 0; i < serial_nums; i++) {
+ struct serial_in_rdev *serial_tmp = &serial[i];
+
+ spin_lock_init(&serial_tmp->serial_lock);
+ serial_tmp->serial_rb = RB_ROOT_CACHED;
+ init_waitqueue_head(&serial_tmp->serial_io_wait);
+ }
+
+ rdev->serial = serial;
+ set_bit(CollisionCheck, &rdev->flags);
+
+ return 0;
+}
+
+static int rdevs_init_serial(struct mddev *mddev)
+{
+ struct md_rdev *rdev;
+ int ret = 0;
+
+ rdev_for_each(rdev, mddev) {
+ ret = rdev_init_serial(rdev);
+ if (ret)
+ break;
+ }
+
+ /* Free all resources if pool is not existed */
+ if (ret && !mddev->serial_info_pool)
+ rdevs_uninit_serial(mddev);
+
+ return ret;
}
/*
- * Create wb_info_pool if rdev is the first multi-queue device flaged
- * with writemostly, also write-behind mode is enabled.
+ * rdev needs to enable serial stuffs if it meets the conditions:
+ * 1. it is multi-queue device flaged with writemostly.
+ * 2. the write-behind mode is enabled.
*/
-void mddev_create_wb_pool(struct mddev *mddev, struct md_rdev *rdev,
- bool is_suspend)
+static int rdev_need_serial(struct md_rdev *rdev)
{
- if (mddev->bitmap_info.max_write_behind == 0)
- return;
+ return (rdev && rdev->mddev->bitmap_info.max_write_behind > 0 &&
+ rdev->bdev->bd_queue->nr_hw_queues != 1 &&
+ test_bit(WriteMostly, &rdev->flags));
+}
+
+/*
+ * Init resource for rdev(s), then create serial_info_pool if:
+ * 1. rdev is the first device which return true from rdev_enable_serial.
+ * 2. rdev is NULL, means we want to enable serialization for all rdevs.
+ */
+void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
+ bool is_suspend)
+{
+ int ret = 0;
- if (!test_bit(WriteMostly, &rdev->flags) || !rdev_init_wb(rdev))
+ if (rdev && !rdev_need_serial(rdev) &&
+ !test_bit(CollisionCheck, &rdev->flags))
return;
- if (mddev->wb_info_pool == NULL) {
+ if (!is_suspend)
+ mddev_suspend(mddev);
+
+ if (!rdev)
+ ret = rdevs_init_serial(mddev);
+ else
+ ret = rdev_init_serial(rdev);
+ if (ret)
+ goto abort;
+
+ if (mddev->serial_info_pool == NULL) {
unsigned int noio_flag;
- if (!is_suspend)
- mddev_suspend(mddev);
noio_flag = memalloc_noio_save();
- mddev->wb_info_pool = mempool_create_kmalloc_pool(NR_WB_INFOS,
- sizeof(struct wb_info));
+ mddev->serial_info_pool =
+ mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
+ sizeof(struct serial_info));
memalloc_noio_restore(noio_flag);
- if (!mddev->wb_info_pool)
- pr_err("can't alloc memory pool for writemostly\n");
- if (!is_suspend)
- mddev_resume(mddev);
+ if (!mddev->serial_info_pool) {
+ rdevs_uninit_serial(mddev);
+ pr_err("can't alloc memory pool for serialization\n");
+ }
}
+
+abort:
+ if (!is_suspend)
+ mddev_resume(mddev);
}
-EXPORT_SYMBOL_GPL(mddev_create_wb_pool);
/*
- * destroy wb_info_pool if rdev is the last device flaged with WBCollisionCheck.
+ * Free resource from rdev(s), and destroy serial_info_pool under conditions:
+ * 1. rdev is the last device flaged with CollisionCheck.
+ * 2. when bitmap is destroyed while policy is not enabled.
+ * 3. for disable policy, the pool is destroyed only when no rdev needs it.
*/
-static void mddev_destroy_wb_pool(struct mddev *mddev, struct md_rdev *rdev)
+void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
+ bool is_suspend)
{
- if (!test_and_clear_bit(WBCollisionCheck, &rdev->flags))
+ if (rdev && !test_bit(CollisionCheck, &rdev->flags))
return;
- if (mddev->wb_info_pool) {
+ if (mddev->serial_info_pool) {
struct md_rdev *temp;
- int num = 0;
+ int num = 0; /* used to track if other rdevs need the pool */
- /*
- * Check if other rdevs need wb_info_pool.
- */
- rdev_for_each(temp, mddev)
- if (temp != rdev &&
- test_bit(WBCollisionCheck, &temp->flags))
+ if (!is_suspend)
+ mddev_suspend(mddev);
+ rdev_for_each(temp, mddev) {
+ if (!rdev) {
+ if (!mddev->serialize_policy ||
+ !rdev_need_serial(temp))
+ rdev_uninit_serial(temp);
+ else
+ num++;
+ } else if (temp != rdev &&
+ test_bit(CollisionCheck, &temp->flags))
num++;
- if (!num) {
- mddev_suspend(rdev->mddev);
- mempool_destroy(mddev->wb_info_pool);
- mddev->wb_info_pool = NULL;
- mddev_resume(rdev->mddev);
}
+
+ if (rdev)
+ rdev_uninit_serial(rdev);
+
+ if (num)
+ pr_info("The mempool could be used by other devices\n");
+ else {
+ mempool_destroy(mddev->serial_info_pool);
+ mddev->serial_info_pool = NULL;
+ }
+ if (!is_suspend)
+ mddev_resume(mddev);
}
}
@@ -1159,6 +1250,7 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
/* not spare disk, or LEVEL_MULTIPATH */
if (sb->level == LEVEL_MULTIPATH ||
(rdev->desc_nr >= 0 &&
+ rdev->desc_nr < MD_SB_DISKS &&
sb->disks[rdev->desc_nr].state &
((1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE))))
spare_disk = false;
@@ -2336,7 +2428,7 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
pr_debug("md: bind<%s>\n", b);
if (mddev->raid_disks)
- mddev_create_wb_pool(mddev, rdev, false);
+ mddev_create_serial_pool(mddev, rdev, false);
if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
goto fail;
@@ -2374,7 +2466,7 @@ static void unbind_rdev_from_array(struct md_rdev *rdev)
bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
list_del_rcu(&rdev->same_set);
pr_debug("md: unbind<%s>\n", bdevname(rdev->bdev,b));
- mddev_destroy_wb_pool(rdev->mddev, rdev);
+ mddev_destroy_serial_pool(rdev->mddev, rdev, false);
rdev->mddev = NULL;
sysfs_remove_link(&rdev->kobj, "block");
sysfs_put(rdev->sysfs_state);
@@ -2887,10 +2979,10 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
}
} else if (cmd_match(buf, "writemostly")) {
set_bit(WriteMostly, &rdev->flags);
- mddev_create_wb_pool(rdev->mddev, rdev, false);
+ mddev_create_serial_pool(rdev->mddev, rdev, false);
err = 0;
} else if (cmd_match(buf, "-writemostly")) {
- mddev_destroy_wb_pool(rdev->mddev, rdev);
+ mddev_destroy_serial_pool(rdev->mddev, rdev, false);
clear_bit(WriteMostly, &rdev->flags);
err = 0;
} else if (cmd_match(buf, "blocked")) {
@@ -5276,6 +5368,57 @@ static struct md_sysfs_entry md_fail_last_dev =
__ATTR(fail_last_dev, S_IRUGO | S_IWUSR, fail_last_dev_show,
fail_last_dev_store);
+static ssize_t serialize_policy_show(struct mddev *mddev, char *page)
+{
+ if (mddev->pers == NULL || (mddev->pers->level != 1))
+ return sprintf(page, "n/a\n");
+ else
+ return sprintf(page, "%d\n", mddev->serialize_policy);
+}
+
+/*
+ * Setting serialize_policy to true to enforce write IO is not reordered
+ * for raid1.
+ */
+static ssize_t
+serialize_policy_store(struct mddev *mddev, const char *buf, size_t len)
+{
+ int err;
+ bool value;
+
+ err = kstrtobool(buf, &value);
+ if (err)
+ return err;
+
+ if (value == mddev->serialize_policy)
+ return len;
+
+ err = mddev_lock(mddev);
+ if (err)
+ return err;
+ if (mddev->pers == NULL || (mddev->pers->level != 1)) {
+ pr_err("md: serialize_policy is only effective for raid1\n");
+ err = -EINVAL;
+ goto unlock;
+ }
+
+ mddev_suspend(mddev);
+ if (value)
+ mddev_create_serial_pool(mddev, NULL, true);
+ else
+ mddev_destroy_serial_pool(mddev, NULL, true);
+ mddev->serialize_policy = value;
+ mddev_resume(mddev);
+unlock:
+ mddev_unlock(mddev);
+ return err ?: len;
+}
+
+static struct md_sysfs_entry md_serialize_policy =
+__ATTR(serialize_policy, S_IRUGO | S_IWUSR, serialize_policy_show,
+ serialize_policy_store);
+
+
static struct attribute *md_default_attrs[] = {
&md_level.attr,
&md_layout.attr,
@@ -5293,6 +5436,7 @@ static struct attribute *md_default_attrs[] = {
&max_corr_read_errors.attr,
&md_consistency_policy.attr,
&md_fail_last_dev.attr,
+ &md_serialize_policy.attr,
NULL,
};
@@ -5768,18 +5912,18 @@ int md_run(struct mddev *mddev)
goto bitmap_abort;
if (mddev->bitmap_info.max_write_behind > 0) {
- bool creat_pool = false;
+ bool create_pool = false;
rdev_for_each(rdev, mddev) {
if (test_bit(WriteMostly, &rdev->flags) &&
- rdev_init_wb(rdev))
- creat_pool = true;
- }
- if (creat_pool && mddev->wb_info_pool == NULL) {
- mddev->wb_info_pool =
- mempool_create_kmalloc_pool(NR_WB_INFOS,
- sizeof(struct wb_info));
- if (!mddev->wb_info_pool) {
+ rdev_init_serial(rdev))
+ create_pool = true;
+ }
+ if (create_pool && mddev->serial_info_pool == NULL) {
+ mddev->serial_info_pool =
+ mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
+ sizeof(struct serial_info));
+ if (!mddev->serial_info_pool) {
err = -ENOMEM;
goto bitmap_abort;
}
@@ -6024,8 +6168,9 @@ static void __md_stop_writes(struct mddev *mddev)
mddev->in_sync = 1;
md_update_sb(mddev, 1);
}
- mempool_destroy(mddev->wb_info_pool);
- mddev->wb_info_pool = NULL;
+ /* disable policy to guarantee rdevs free resources for serialization */
+ mddev->serialize_policy = 0;
+ mddev_destroy_serial_pool(mddev, NULL, true);
}
void md_stop_writes(struct mddev *mddev)