aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c168
1 files changed, 75 insertions, 93 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 98bac4f304ae..ca409428b4fc 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -464,7 +464,7 @@ struct md_io {
bio_end_io_t *orig_bi_end_io;
void *orig_bi_private;
unsigned long start_time;
- struct hd_struct *part;
+ struct block_device *part;
};
static void md_end_io(struct bio *bio)
@@ -639,7 +639,7 @@ static void md_submit_flush_data(struct work_struct *ws)
* could wait for this and below md_handle_request could wait for those
* bios because of suspend check
*/
- mddev->last_flush = mddev->start_flush;
+ mddev->prev_flush_start = mddev->start_flush;
mddev->flush_bio = NULL;
wake_up(&mddev->sb_wait);
@@ -660,13 +660,17 @@ static void md_submit_flush_data(struct work_struct *ws)
*/
bool md_flush_request(struct mddev *mddev, struct bio *bio)
{
- ktime_t start = ktime_get_boottime();
+ ktime_t req_start = ktime_get_boottime();
spin_lock_irq(&mddev->lock);
+ /* flush requests wait until ongoing flush completes,
+ * hence coalescing all the pending requests.
+ */
wait_event_lock_irq(mddev->sb_wait,
!mddev->flush_bio ||
- ktime_after(mddev->last_flush, start),
+ ktime_before(req_start, mddev->prev_flush_start),
mddev->lock);
- if (!ktime_after(mddev->last_flush, start)) {
+ /* new request after previous flush is completed */
+ if (ktime_after(req_start, mddev->prev_flush_start)) {
WARN_ON(mddev->flush_bio);
mddev->flush_bio = bio;
bio = NULL;
@@ -2414,7 +2418,6 @@ EXPORT_SYMBOL(md_integrity_add_rdev);
static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
{
char b[BDEVNAME_SIZE];
- struct kobject *ko;
int err;
/* prevent duplicates */
@@ -2477,9 +2480,8 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
goto fail;
- ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
/* failure here is OK */
- err = sysfs_create_link(&rdev->kobj, ko, "block");
+ err = sysfs_create_link(&rdev->kobj, bdev_kobj(rdev->bdev), "block");
rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
rdev->sysfs_unack_badblocks =
sysfs_get_dirent_safe(rdev->kobj.sd, "unacknowledged_bad_blocks");
@@ -5355,10 +5357,9 @@ array_size_store(struct mddev *mddev, const char *buf, size_t len)
if (!err) {
mddev->array_sectors = sectors;
- if (mddev->pers) {
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk_size(mddev->gendisk, true);
- }
+ if (mddev->pers)
+ set_capacity_and_notify(mddev->gendisk,
+ mddev->array_sectors);
}
mddev_unlock(mddev);
return err ?: len;
@@ -5765,11 +5766,12 @@ static int md_alloc(dev_t dev, char *name)
return error;
}
-static struct kobject *md_probe(dev_t dev, int *part, void *data)
+static void md_probe(dev_t dev)
{
+ if (MAJOR(dev) == MD_MAJOR && MINOR(dev) >= 512)
+ return;
if (create_on_open)
md_alloc(dev, NULL);
- return NULL;
}
static int add_named_array(const char *val, const struct kernel_param *kp)
@@ -6107,8 +6109,7 @@ int do_md_run(struct mddev *mddev)
md_wakeup_thread(mddev->thread);
md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk_size(mddev->gendisk, true);
+ set_capacity_and_notify(mddev->gendisk, mddev->array_sectors);
clear_bit(MD_NOT_READY, &mddev->flags);
mddev->changed = 1;
kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
@@ -6423,10 +6424,9 @@ static int do_md_stop(struct mddev *mddev, int mode,
if (rdev->raid_disk >= 0)
sysfs_unlink_rdev(mddev, rdev);
- set_capacity(disk, 0);
+ set_capacity_and_notify(disk, 0);
mutex_unlock(&mddev->open_mutex);
mddev->changed = 1;
- revalidate_disk_size(disk, true);
if (mddev->ro)
mddev->ro = 0;
@@ -6535,7 +6535,7 @@ static void autorun_devices(int part)
break;
}
- md_probe(dev, NULL, NULL);
+ md_probe(dev);
mddev = mddev_find(dev);
if (!mddev || !mddev->gendisk) {
if (mddev)
@@ -6948,8 +6948,10 @@ static int hot_remove_disk(struct mddev *mddev, dev_t dev)
goto busy;
kick_rdev:
- if (mddev_is_clustered(mddev))
- md_cluster_ops->remove_disk(mddev, rdev);
+ if (mddev_is_clustered(mddev)) {
+ if (md_cluster_ops->remove_disk(mddev, rdev))
+ goto busy;
+ }
md_kick_rdev_from_array(rdev);
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
@@ -7257,8 +7259,8 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
if (mddev_is_clustered(mddev))
md_cluster_ops->update_size(mddev, old_dev_sectors);
else if (mddev->queue) {
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk_size(mddev->gendisk, true);
+ set_capacity_and_notify(mddev->gendisk,
+ mddev->array_sectors);
}
}
return rv;
@@ -7278,6 +7280,7 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks)
return -EINVAL;
if (mddev->sync_thread ||
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
+ test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) ||
mddev->reshape_position != MaxSector)
return -EBUSY;
@@ -7480,7 +7483,6 @@ static inline bool md_ioctl_valid(unsigned int cmd)
{
switch (cmd) {
case ADD_NEW_DISK:
- case BLKROSET:
case GET_ARRAY_INFO:
case GET_BITMAP_FILE:
case GET_DISK_INFO:
@@ -7507,7 +7509,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
int err = 0;
void __user *argp = (void __user *)arg;
struct mddev *mddev = NULL;
- int ro;
bool did_set_md_closing = false;
if (!md_ioctl_valid(cmd))
@@ -7590,8 +7591,11 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
err = -EBUSY;
goto out;
}
- WARN_ON_ONCE(test_bit(MD_CLOSING, &mddev->flags));
- set_bit(MD_CLOSING, &mddev->flags);
+ if (test_and_set_bit(MD_CLOSING, &mddev->flags)) {
+ mutex_unlock(&mddev->open_mutex);
+ err = -EBUSY;
+ goto out;
+ }
did_set_md_closing = true;
mutex_unlock(&mddev->open_mutex);
sync_blockdev(bdev);
@@ -7687,35 +7691,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
goto unlock;
}
break;
-
- case BLKROSET:
- if (get_user(ro, (int __user *)(arg))) {
- err = -EFAULT;
- goto unlock;
- }
- err = -EINVAL;
-
- /* if the bdev is going readonly the value of mddev->ro
- * does not matter, no writes are coming
- */
- if (ro)
- goto unlock;
-
- /* are we are already prepared for writes? */
- if (mddev->ro != 1)
- goto unlock;
-
- /* transitioning to readauto need only happen for
- * arrays that call md_write_start
- */
- if (mddev->pers) {
- err = restart_array(mddev);
- if (err == 0) {
- mddev->ro = 2;
- set_disk_ro(mddev->gendisk, 0);
- }
- }
- goto unlock;
}
/*
@@ -7809,6 +7784,36 @@ static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
}
#endif /* CONFIG_COMPAT */
+static int md_set_read_only(struct block_device *bdev, bool ro)
+{
+ struct mddev *mddev = bdev->bd_disk->private_data;
+ int err;
+
+ err = mddev_lock(mddev);
+ if (err)
+ return err;
+
+ if (!mddev->raid_disks && !mddev->external) {
+ err = -ENODEV;
+ goto out_unlock;
+ }
+
+ /*
+ * Transitioning to read-auto need only happen for arrays that call
+ * md_write_start and which are not ready for writes yet.
+ */
+ if (!ro && mddev->ro == 1 && mddev->pers) {
+ err = restart_array(mddev);
+ if (err)
+ goto out_unlock;
+ mddev->ro = 2;
+ }
+
+out_unlock:
+ mddev_unlock(mddev);
+ return err;
+}
+
static int md_open(struct block_device *bdev, fmode_t mode)
{
/*
@@ -7886,6 +7891,7 @@ const struct block_device_operations md_fops =
#endif
.getgeo = md_getgeo,
.check_events = md_check_events,
+ .set_read_only = md_set_read_only,
};
static int md_thread(void *arg)
@@ -8445,7 +8451,7 @@ static int is_mddev_idle(struct mddev *mddev, int init)
rcu_read_lock();
rdev_for_each_rcu(rdev, mddev) {
struct gendisk *disk = rdev->bdev->bd_disk;
- curr_events = (int)part_stat_read_accum(&disk->part0, sectors) -
+ curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
atomic_read(&disk->sync_io);
/* sync IO will cause sync_io to increase before the disk_stats
* as sync_io is counted when a request starts, and
@@ -8582,26 +8588,6 @@ void md_write_end(struct mddev *mddev)
EXPORT_SYMBOL(md_write_end);
-/* This is used by raid0 and raid10 */
-void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
- struct bio *bio, sector_t start, sector_t size)
-{
- struct bio *discard_bio = NULL;
-
- if (__blkdev_issue_discard(rdev->bdev, start, size,
- GFP_NOIO, 0, &discard_bio) || !discard_bio)
- return;
-
- bio_chain(discard_bio, bio);
- bio_clone_blkg_association(discard_bio, bio);
- if (mddev->gendisk)
- trace_block_bio_remap(bdev_get_queue(rdev->bdev),
- discard_bio, disk_devt(mddev->gendisk),
- bio->bi_iter.bi_sector);
- submit_bio_noacct(discard_bio);
-}
-EXPORT_SYMBOL(md_submit_discard_bio);
-
/* md_allow_write(mddev)
* Calling this ensures that the array is marked 'active' so that writes
* may proceed without blocking. It is important to call this before
@@ -9035,10 +9021,9 @@ void md_do_sync(struct md_thread *thread)
mddev_lock_nointr(mddev);
md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
mddev_unlock(mddev);
- if (!mddev_is_clustered(mddev)) {
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk_size(mddev->gendisk, true);
- }
+ if (!mddev_is_clustered(mddev))
+ set_capacity_and_notify(mddev->gendisk,
+ mddev->array_sectors);
}
spin_lock(&mddev->lock);
@@ -9567,18 +9552,15 @@ static int __init md_init(void)
if (!md_rdev_misc_wq)
goto err_rdev_misc_wq;
- if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
+ ret = __register_blkdev(MD_MAJOR, "md", md_probe);
+ if (ret < 0)
goto err_md;
- if ((ret = register_blkdev(0, "mdp")) < 0)
+ ret = __register_blkdev(0, "mdp", md_probe);
+ if (ret < 0)
goto err_mdp;
mdp_major = ret;
- blk_register_region(MKDEV(MD_MAJOR, 0), 512, THIS_MODULE,
- md_probe, NULL, NULL);
- blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
- md_probe, NULL, NULL);
-
register_reboot_notifier(&md_notifier);
raid_table_header = register_sysctl_table(raid_root_table);
@@ -9662,8 +9644,11 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
}
}
- if (mddev->raid_disks != le32_to_cpu(sb->raid_disks))
- update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
+ if (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) {
+ ret = update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
+ if (ret)
+ pr_warn("md: updating array disks failed. %d\n", ret);
+ }
/*
* Since mddev->delta_disks has already updated in update_raid_disks,
@@ -9845,9 +9830,6 @@ static __exit void md_exit(void)
struct list_head *tmp;
int delay = 1;
- blk_unregister_region(MKDEV(MD_MAJOR,0), 512);
- blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
-
unregister_blkdev(MD_MAJOR,"md");
unregister_blkdev(mdp_major, "mdp");
unregister_reboot_notifier(&md_notifier);