aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-08-30 11:24:04 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-08-30 11:24:04 -0700
commit86a1679860babbacd61fc1e8c0c0f43641d5860d (patch)
treecb09c62e0c86926e2efbafdbd3df333838c2823c /drivers/md
parentMerge tag 'nfs-for-4.8-3' of git://git.linux-nfs.org/projects/trondmy/linux-nfs (diff)
parentraid5: avoid unnecessary bio data set (diff)
downloadlinux-dev-86a1679860babbacd61fc1e8c0c0f43641d5860d.tar.xz
linux-dev-86a1679860babbacd61fc1e8c0c0f43641d5860d.zip
Merge tag 'md/4.8-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
Pull MD fixes from Shaohua Li: "This includes several bug fixes: - Alexey Obitotskiy fixed a hang for faulty raid5 array with external management - Song Liu fixed two raid5 journal related bugs - Tomasz Majchrzak fixed a bad block recording issue and an accounting issue for raid10 - ZhengYuan Liu fixed an accounting issue for raid5 - I fixed a potential race condition and memory leak with DIF/DIX enabled - other trival fixes" * tag 'md/4.8-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md: raid5: avoid unnecessary bio data set raid5: fix memory leak of bio integrity data raid10: record correct address of bad block md-cluster: fix error return code in join() r5cache: set MD_JOURNAL_CLEAN correctly md: don't print the same repeated messages about delayed sync operation md: remove obsolete ret in md_start_sync md: do not count journal as spare in GET_ARRAY_INFO md: Prevent IO hold during accessing to faulty raid5 array MD: hold mddev lock to change bitmap location raid5: fix incorrectly counter of conf->empty_inactive_list_nr raid10: increment write counter after bio is split
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bitmap.c47
-rw-r--r--drivers/md/md-cluster.c12
-rw-r--r--drivers/md/md.c28
-rw-r--r--drivers/md/raid10.c13
-rw-r--r--drivers/md/raid5.c64
5 files changed, 107 insertions, 57 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 6fff794e0c72..13041ee37ad6 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -2183,19 +2183,29 @@ location_show(struct mddev *mddev, char *page)
static ssize_t
location_store(struct mddev *mddev, const char *buf, size_t len)
{
+ int rv;
+ rv = mddev_lock(mddev);
+ if (rv)
+ return rv;
if (mddev->pers) {
- if (!mddev->pers->quiesce)
- return -EBUSY;
- if (mddev->recovery || mddev->sync_thread)
- return -EBUSY;
+ if (!mddev->pers->quiesce) {
+ rv = -EBUSY;
+ goto out;
+ }
+ if (mddev->recovery || mddev->sync_thread) {
+ rv = -EBUSY;
+ goto out;
+ }
}
if (mddev->bitmap || mddev->bitmap_info.file ||
mddev->bitmap_info.offset) {
/* bitmap already configured. Only option is to clear it */
- if (strncmp(buf, "none", 4) != 0)
- return -EBUSY;
+ if (strncmp(buf, "none", 4) != 0) {
+ rv = -EBUSY;
+ goto out;
+ }
if (mddev->pers) {
mddev->pers->quiesce(mddev, 1);
bitmap_destroy(mddev);
@@ -2214,21 +2224,25 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
/* nothing to be done */;
else if (strncmp(buf, "file:", 5) == 0) {
/* Not supported yet */
- return -EINVAL;
+ rv = -EINVAL;
+ goto out;
} else {
- int rv;
if (buf[0] == '+')
rv = kstrtoll(buf+1, 10, &offset);
else
rv = kstrtoll(buf, 10, &offset);
if (rv)
- return rv;
- if (offset == 0)
- return -EINVAL;
+ goto out;
+ if (offset == 0) {
+ rv = -EINVAL;
+ goto out;
+ }
if (mddev->bitmap_info.external == 0 &&
mddev->major_version == 0 &&
- offset != mddev->bitmap_info.default_offset)
- return -EINVAL;
+ offset != mddev->bitmap_info.default_offset) {
+ rv = -EINVAL;
+ goto out;
+ }
mddev->bitmap_info.offset = offset;
if (mddev->pers) {
struct bitmap *bitmap;
@@ -2245,7 +2259,7 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
mddev->pers->quiesce(mddev, 0);
if (rv) {
bitmap_destroy(mddev);
- return rv;
+ goto out;
}
}
}
@@ -2257,6 +2271,11 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
set_bit(MD_CHANGE_DEVS, &mddev->flags);
md_wakeup_thread(mddev->thread);
}
+ rv = 0;
+out:
+ mddev_unlock(mddev);
+ if (rv)
+ return rv;
return len;
}
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 41573f1f626f..34a840d9df76 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -834,8 +834,10 @@ static int join(struct mddev *mddev, int nodes)
goto err;
}
cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0);
- if (!cinfo->ack_lockres)
+ if (!cinfo->ack_lockres) {
+ ret = -ENOMEM;
goto err;
+ }
/* get sync CR lock on ACK. */
if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR))
pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n",
@@ -849,8 +851,10 @@ static int join(struct mddev *mddev, int nodes)
pr_info("md-cluster: Joined cluster %s slot %d\n", str, cinfo->slot_number);
snprintf(str, 64, "bitmap%04d", cinfo->slot_number - 1);
cinfo->bitmap_lockres = lockres_init(mddev, str, NULL, 1);
- if (!cinfo->bitmap_lockres)
+ if (!cinfo->bitmap_lockres) {
+ ret = -ENOMEM;
goto err;
+ }
if (dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW)) {
pr_err("Failed to get bitmap lock\n");
ret = -EINVAL;
@@ -858,8 +862,10 @@ static int join(struct mddev *mddev, int nodes)
}
cinfo->resync_lockres = lockres_init(mddev, "resync", NULL, 0);
- if (!cinfo->resync_lockres)
+ if (!cinfo->resync_lockres) {
+ ret = -ENOMEM;
goto err;
+ }
return 0;
err:
diff --git a/drivers/md/md.c b/drivers/md/md.c
index d646f6e444f0..67642bacd597 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1604,11 +1604,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
mddev->new_chunk_sectors = mddev->chunk_sectors;
}
- if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL) {
+ if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
set_bit(MD_HAS_JOURNAL, &mddev->flags);
- if (mddev->recovery_cp == MaxSector)
- set_bit(MD_JOURNAL_CLEAN, &mddev->flags);
- }
} else if (mddev->pers == NULL) {
/* Insist of good event counter while assembling, except for
* spares (which don't need an event count) */
@@ -5851,6 +5848,9 @@ static int get_array_info(struct mddev *mddev, void __user *arg)
working++;
if (test_bit(In_sync, &rdev->flags))
insync++;
+ else if (test_bit(Journal, &rdev->flags))
+ /* TODO: add journal count to md_u.h */
+ ;
else
spare++;
}
@@ -7862,6 +7862,7 @@ void md_do_sync(struct md_thread *thread)
*/
do {
+ int mddev2_minor = -1;
mddev->curr_resync = 2;
try_again:
@@ -7891,10 +7892,14 @@ void md_do_sync(struct md_thread *thread)
prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
mddev2->curr_resync >= mddev->curr_resync) {
- printk(KERN_INFO "md: delaying %s of %s"
- " until %s has finished (they"
- " share one or more physical units)\n",
- desc, mdname(mddev), mdname(mddev2));
+ if (mddev2_minor != mddev2->md_minor) {
+ mddev2_minor = mddev2->md_minor;
+ printk(KERN_INFO "md: delaying %s of %s"
+ " until %s has finished (they"
+ " share one or more physical units)\n",
+ desc, mdname(mddev),
+ mdname(mddev2));
+ }
mddev_put(mddev2);
if (signal_pending(current))
flush_signals(current);
@@ -8275,16 +8280,13 @@ no_add:
static void md_start_sync(struct work_struct *ws)
{
struct mddev *mddev = container_of(ws, struct mddev, del_work);
- int ret = 0;
mddev->sync_thread = md_register_thread(md_do_sync,
mddev,
"resync");
if (!mddev->sync_thread) {
- if (!(mddev_is_clustered(mddev) && ret == -EAGAIN))
- printk(KERN_ERR "%s: could not start resync"
- " thread...\n",
- mdname(mddev));
+ printk(KERN_ERR "%s: could not start resync thread...\n",
+ mdname(mddev));
/* leave the spares where they are, it shouldn't hurt */
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 0e4efcd10795..be1a9fca3b2d 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1064,6 +1064,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
int max_sectors;
int sectors;
+ md_write_start(mddev, bio);
+
/*
* Register the new request and wait if the reconstruction
* thread has put up a bar for new requests.
@@ -1445,8 +1447,6 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio)
return;
}
- md_write_start(mddev, bio);
-
do {
/*
@@ -2465,20 +2465,21 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
while (sect_to_write) {
struct bio *wbio;
+ sector_t wsector;
if (sectors > sect_to_write)
sectors = sect_to_write;
/* Write at 'sector' for 'sectors' */
wbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors);
- wbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
- choose_data_offset(r10_bio, rdev) +
- (sector - r10_bio->sector));
+ wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector);
+ wbio->bi_iter.bi_sector = wsector +
+ choose_data_offset(r10_bio, rdev);
wbio->bi_bdev = rdev->bdev;
bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
if (submit_bio_wait(wbio) < 0)
/* Failure! */
- ok = rdev_set_badblocks(rdev, sector,
+ ok = rdev_set_badblocks(rdev, wsector,
sectors, 0)
&& ok;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 8912407a4dd0..da583bb43c84 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -659,6 +659,7 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
{
struct stripe_head *sh;
int hash = stripe_hash_locks_hash(sector);
+ int inc_empty_inactive_list_flag;
pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector);
@@ -703,7 +704,12 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
atomic_inc(&conf->active_stripes);
BUG_ON(list_empty(&sh->lru) &&
!test_bit(STRIPE_EXPANDING, &sh->state));
+ inc_empty_inactive_list_flag = 0;
+ if (!list_empty(conf->inactive_list + hash))
+ inc_empty_inactive_list_flag = 1;
list_del_init(&sh->lru);
+ if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag)
+ atomic_inc(&conf->empty_inactive_list_nr);
if (sh->group) {
sh->group->stripes_cnt--;
sh->group = NULL;
@@ -762,6 +768,7 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
sector_t head_sector, tmp_sec;
int hash;
int dd_idx;
+ int inc_empty_inactive_list_flag;
/* Don't cross chunks, so stripe pd_idx/qd_idx is the same */
tmp_sec = sh->sector;
@@ -779,7 +786,12 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
atomic_inc(&conf->active_stripes);
BUG_ON(list_empty(&head->lru) &&
!test_bit(STRIPE_EXPANDING, &head->state));
+ inc_empty_inactive_list_flag = 0;
+ if (!list_empty(conf->inactive_list + hash))
+ inc_empty_inactive_list_flag = 1;
list_del_init(&head->lru);
+ if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag)
+ atomic_inc(&conf->empty_inactive_list_nr);
if (head->group) {
head->group->stripes_cnt--;
head->group = NULL;
@@ -993,7 +1005,6 @@ again:
set_bit(STRIPE_IO_STARTED, &sh->state);
- bio_reset(bi);
bi->bi_bdev = rdev->bdev;
bio_set_op_attrs(bi, op, op_flags);
bi->bi_end_io = op_is_write(op)
@@ -1045,7 +1056,6 @@ again:
set_bit(STRIPE_IO_STARTED, &sh->state);
- bio_reset(rbi);
rbi->bi_bdev = rrdev->bdev;
bio_set_op_attrs(rbi, op, op_flags);
BUG_ON(!op_is_write(op));
@@ -1978,9 +1988,11 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
put_cpu();
}
-static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp)
+static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
+ int disks)
{
struct stripe_head *sh;
+ int i;
sh = kmem_cache_zalloc(sc, gfp);
if (sh) {
@@ -1989,6 +2001,17 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp)
INIT_LIST_HEAD(&sh->batch_list);
INIT_LIST_HEAD(&sh->lru);
atomic_set(&sh->count, 1);
+ for (i = 0; i < disks; i++) {
+ struct r5dev *dev = &sh->dev[i];
+
+ bio_init(&dev->req);
+ dev->req.bi_io_vec = &dev->vec;
+ dev->req.bi_max_vecs = 1;
+
+ bio_init(&dev->rreq);
+ dev->rreq.bi_io_vec = &dev->rvec;
+ dev->rreq.bi_max_vecs = 1;
+ }
}
return sh;
}
@@ -1996,7 +2019,7 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
{
struct stripe_head *sh;
- sh = alloc_stripe(conf->slab_cache, gfp);
+ sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size);
if (!sh)
return 0;
@@ -2167,7 +2190,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
mutex_lock(&conf->cache_size_mutex);
for (i = conf->max_nr_stripes; i; i--) {
- nsh = alloc_stripe(sc, GFP_KERNEL);
+ nsh = alloc_stripe(sc, GFP_KERNEL, newsize);
if (!nsh)
break;
@@ -2299,6 +2322,7 @@ static void raid5_end_read_request(struct bio * bi)
(unsigned long long)sh->sector, i, atomic_read(&sh->count),
bi->bi_error);
if (i == disks) {
+ bio_reset(bi);
BUG();
return;
}
@@ -2402,6 +2426,7 @@ static void raid5_end_read_request(struct bio * bi)
clear_bit(R5_LOCKED, &sh->dev[i].flags);
set_bit(STRIPE_HANDLE, &sh->state);
raid5_release_stripe(sh);
+ bio_reset(bi);
}
static void raid5_end_write_request(struct bio *bi)
@@ -2436,6 +2461,7 @@ static void raid5_end_write_request(struct bio *bi)
(unsigned long long)sh->sector, i, atomic_read(&sh->count),
bi->bi_error);
if (i == disks) {
+ bio_reset(bi);
BUG();
return;
}
@@ -2479,22 +2505,13 @@ static void raid5_end_write_request(struct bio *bi)
if (sh->batch_head && sh != sh->batch_head)
raid5_release_stripe(sh->batch_head);
+ bio_reset(bi);
}
static void raid5_build_block(struct stripe_head *sh, int i, int previous)
{
struct r5dev *dev = &sh->dev[i];
- bio_init(&dev->req);
- dev->req.bi_io_vec = &dev->vec;
- dev->req.bi_max_vecs = 1;
- dev->req.bi_private = sh;
-
- bio_init(&dev->rreq);
- dev->rreq.bi_io_vec = &dev->rvec;
- dev->rreq.bi_max_vecs = 1;
- dev->rreq.bi_private = sh;
-
dev->flags = 0;
dev->sector = raid5_compute_blocknr(sh, i, previous);
}
@@ -4628,7 +4645,9 @@ finish:
}
if (!bio_list_empty(&s.return_bi)) {
- if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags)) {
+ if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags) &&
+ (s.failed <= conf->max_degraded ||
+ conf->mddev->external == 0)) {
spin_lock_irq(&conf->device_lock);
bio_list_merge(&conf->return_bi, &s.return_bi);
spin_unlock_irq(&conf->device_lock);
@@ -6826,11 +6845,14 @@ static int raid5_run(struct mddev *mddev)
if (IS_ERR(conf))
return PTR_ERR(conf);
- if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !journal_dev) {
- printk(KERN_ERR "md/raid:%s: journal disk is missing, force array readonly\n",
- mdname(mddev));
- mddev->ro = 1;
- set_disk_ro(mddev->gendisk, 1);
+ if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
+ if (!journal_dev) {
+ pr_err("md/raid:%s: journal disk is missing, force array readonly\n",
+ mdname(mddev));
+ mddev->ro = 1;
+ set_disk_ro(mddev->gendisk, 1);
+ } else if (mddev->recovery_cp == MaxSector)
+ set_bit(MD_JOURNAL_CLEAN, &mddev->flags);
}
conf->min_offset_diff = min_offset_diff;