aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-crypt.c27
-rw-r--r--drivers/md/dm-rq.c2
-rw-r--r--drivers/md/dm-thin-metadata.c4
-rw-r--r--drivers/md/dm-thin-metadata.h2
-rw-r--r--drivers/md/dm-thin.c65
-rw-r--r--drivers/md/dm.c41
-rw-r--r--drivers/md/md.c7
-rw-r--r--drivers/md/raid1.c28
-rw-r--r--drivers/md/raid5-cache.c33
-rw-r--r--drivers/md/raid5.c8
10 files changed, 156 insertions, 61 deletions
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 0ff22159a0ca..dd538e6b2748 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -932,7 +932,7 @@ static int dm_crypt_integrity_io_alloc(struct dm_crypt_io *io, struct bio *bio)
if (IS_ERR(bip))
return PTR_ERR(bip);
- tag_len = io->cc->on_disk_tag_size * bio_sectors(bio);
+ tag_len = io->cc->on_disk_tag_size * (bio_sectors(bio) >> io->cc->sector_shift);
bip->bip_iter.bi_size = tag_len;
bip->bip_iter.bi_sector = io->cc->start + io->sector;
@@ -2414,9 +2414,21 @@ static int crypt_ctr_cipher_new(struct dm_target *ti, char *cipher_in, char *key
* capi:cipher_api_spec-iv:ivopts
*/
tmp = &cipher_in[strlen("capi:")];
- cipher_api = strsep(&tmp, "-");
- *ivmode = strsep(&tmp, ":");
- *ivopts = tmp;
+
+ /* Separate IV options if present, it can contain another '-' in hash name */
+ *ivopts = strrchr(tmp, ':');
+ if (*ivopts) {
+ **ivopts = '\0';
+ (*ivopts)++;
+ }
+ /* Parse IV mode */
+ *ivmode = strrchr(tmp, '-');
+ if (*ivmode) {
+ **ivmode = '\0';
+ (*ivmode)++;
+ }
+ /* The rest is crypto API spec */
+ cipher_api = tmp;
if (*ivmode && !strcmp(*ivmode, "lmk"))
cc->tfms_count = 64;
@@ -2486,11 +2498,8 @@ static int crypt_ctr_cipher_old(struct dm_target *ti, char *cipher_in, char *key
goto bad_mem;
chainmode = strsep(&tmp, "-");
- *ivopts = strsep(&tmp, "-");
- *ivmode = strsep(&*ivopts, ":");
-
- if (tmp)
- DMWARN("Ignoring unexpected additional cipher options");
+ *ivmode = strsep(&tmp, ":");
+ *ivopts = tmp;
/*
* For compatibility with the original dm-crypt mapping format, if
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 4eb5f8c56535..a20531e5f3b4 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -131,7 +131,7 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig)
static void rq_completed(struct mapped_device *md)
{
/* nudge anyone waiting on suspend queue */
- if (unlikely(waitqueue_active(&md->wait)))
+ if (unlikely(wq_has_sleeper(&md->wait)))
wake_up(&md->wait);
/*
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 20b0776e39ef..ed3caceaed07 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -1678,7 +1678,7 @@ int dm_thin_remove_range(struct dm_thin_device *td,
return r;
}
-int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *result)
+int dm_pool_block_is_shared(struct dm_pool_metadata *pmd, dm_block_t b, bool *result)
{
int r;
uint32_t ref_count;
@@ -1686,7 +1686,7 @@ int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *resu
down_read(&pmd->root_lock);
r = dm_sm_get_count(pmd->data_sm, b, &ref_count);
if (!r)
- *result = (ref_count != 0);
+ *result = (ref_count > 1);
up_read(&pmd->root_lock);
return r;
diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h
index 35e954ea20a9..f6be0d733c20 100644
--- a/drivers/md/dm-thin-metadata.h
+++ b/drivers/md/dm-thin-metadata.h
@@ -195,7 +195,7 @@ int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd,
int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result);
-int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *result);
+int dm_pool_block_is_shared(struct dm_pool_metadata *pmd, dm_block_t b, bool *result);
int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e);
int dm_pool_dec_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e);
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index dadd9696340c..e83b63608262 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -257,6 +257,7 @@ struct pool {
spinlock_t lock;
struct bio_list deferred_flush_bios;
+ struct bio_list deferred_flush_completions;
struct list_head prepared_mappings;
struct list_head prepared_discards;
struct list_head prepared_discards_pt2;
@@ -956,6 +957,39 @@ static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m)
mempool_free(m, &m->tc->pool->mapping_pool);
}
+static void complete_overwrite_bio(struct thin_c *tc, struct bio *bio)
+{
+ struct pool *pool = tc->pool;
+ unsigned long flags;
+
+ /*
+ * If the bio has the REQ_FUA flag set we must commit the metadata
+ * before signaling its completion.
+ */
+ if (!bio_triggers_commit(tc, bio)) {
+ bio_endio(bio);
+ return;
+ }
+
+ /*
+ * Complete bio with an error if earlier I/O caused changes to the
+ * metadata that can't be committed, e.g, due to I/O errors on the
+ * metadata device.
+ */
+ if (dm_thin_aborted_changes(tc->td)) {
+ bio_io_error(bio);
+ return;
+ }
+
+ /*
+ * Batch together any bios that trigger commits and then issue a
+ * single commit for them in process_deferred_bios().
+ */
+ spin_lock_irqsave(&pool->lock, flags);
+ bio_list_add(&pool->deferred_flush_completions, bio);
+ spin_unlock_irqrestore(&pool->lock, flags);
+}
+
static void process_prepared_mapping(struct dm_thin_new_mapping *m)
{
struct thin_c *tc = m->tc;
@@ -988,7 +1022,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
*/
if (bio) {
inc_remap_and_issue_cell(tc, m->cell, m->data_block);
- bio_endio(bio);
+ complete_overwrite_bio(tc, bio);
} else {
inc_all_io_entry(tc->pool, m->cell->holder);
remap_and_issue(tc, m->cell->holder, m->data_block);
@@ -1048,7 +1082,7 @@ static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m
* passdown we have to check that these blocks are now unused.
*/
int r = 0;
- bool used = true;
+ bool shared = true;
struct thin_c *tc = m->tc;
struct pool *pool = tc->pool;
dm_block_t b = m->data_block, e, end = m->data_block + m->virt_end - m->virt_begin;
@@ -1058,11 +1092,11 @@ static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m
while (b != end) {
/* find start of unmapped run */
for (; b < end; b++) {
- r = dm_pool_block_is_used(pool->pmd, b, &used);
+ r = dm_pool_block_is_shared(pool->pmd, b, &shared);
if (r)
goto out;
- if (!used)
+ if (!shared)
break;
}
@@ -1071,11 +1105,11 @@ static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m
/* find end of run */
for (e = b + 1; e != end; e++) {
- r = dm_pool_block_is_used(pool->pmd, e, &used);
+ r = dm_pool_block_is_shared(pool->pmd, e, &shared);
if (r)
goto out;
- if (used)
+ if (shared)
break;
}
@@ -2317,7 +2351,7 @@ static void process_deferred_bios(struct pool *pool)
{
unsigned long flags;
struct bio *bio;
- struct bio_list bios;
+ struct bio_list bios, bio_completions;
struct thin_c *tc;
tc = get_first_thin(pool);
@@ -2328,26 +2362,36 @@ static void process_deferred_bios(struct pool *pool)
}
/*
- * If there are any deferred flush bios, we must commit
- * the metadata before issuing them.
+ * If there are any deferred flush bios, we must commit the metadata
+ * before issuing them or signaling their completion.
*/
bio_list_init(&bios);
+ bio_list_init(&bio_completions);
+
spin_lock_irqsave(&pool->lock, flags);
bio_list_merge(&bios, &pool->deferred_flush_bios);
bio_list_init(&pool->deferred_flush_bios);
+
+ bio_list_merge(&bio_completions, &pool->deferred_flush_completions);
+ bio_list_init(&pool->deferred_flush_completions);
spin_unlock_irqrestore(&pool->lock, flags);
- if (bio_list_empty(&bios) &&
+ if (bio_list_empty(&bios) && bio_list_empty(&bio_completions) &&
!(dm_pool_changed_this_transaction(pool->pmd) && need_commit_due_to_time(pool)))
return;
if (commit(pool)) {
+ bio_list_merge(&bios, &bio_completions);
+
while ((bio = bio_list_pop(&bios)))
bio_io_error(bio);
return;
}
pool->last_commit_jiffies = jiffies;
+ while ((bio = bio_list_pop(&bio_completions)))
+ bio_endio(bio);
+
while ((bio = bio_list_pop(&bios)))
generic_make_request(bio);
}
@@ -2954,6 +2998,7 @@ static struct pool *pool_create(struct mapped_device *pool_md,
INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout);
spin_lock_init(&pool->lock);
bio_list_init(&pool->deferred_flush_bios);
+ bio_list_init(&pool->deferred_flush_completions);
INIT_LIST_HEAD(&pool->prepared_mappings);
INIT_LIST_HEAD(&pool->prepared_discards);
INIT_LIST_HEAD(&pool->prepared_discards_pt2);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index d67c95ef8d7e..515e6af9bed2 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -699,7 +699,7 @@ static void end_io_acct(struct dm_io *io)
true, duration, &io->stats_aux);
/* nudge anyone waiting on suspend queue */
- if (unlikely(waitqueue_active(&md->wait)))
+ if (unlikely(wq_has_sleeper(&md->wait)))
wake_up(&md->wait);
}
@@ -1320,7 +1320,7 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio,
__bio_clone_fast(clone, bio);
- if (unlikely(bio_integrity(bio) != NULL)) {
+ if (bio_integrity(bio)) {
int r;
if (unlikely(!dm_target_has_integrity(tio->ti->type) &&
@@ -1339,7 +1339,7 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio,
bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector));
clone->bi_iter.bi_size = to_bytes(len);
- if (unlikely(bio_integrity(bio) != NULL))
+ if (bio_integrity(bio))
bio_integrity_trim(clone);
return 0;
@@ -1588,6 +1588,9 @@ static void init_clone_info(struct clone_info *ci, struct mapped_device *md,
ci->sector = bio->bi_iter.bi_sector;
}
+#define __dm_part_stat_sub(part, field, subnd) \
+ (part_stat_get(part, field) -= (subnd))
+
/*
* Entry point to split a bio into clones and submit them to the targets.
*/
@@ -1642,7 +1645,21 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
struct bio *b = bio_split(bio, bio_sectors(bio) - ci.sector_count,
GFP_NOIO, &md->queue->bio_split);
ci.io->orig_bio = b;
+
+ /*
+ * Adjust IO stats for each split, otherwise upon queue
+ * reentry there will be redundant IO accounting.
+ * NOTE: this is a stop-gap fix, a proper fix involves
+ * significant refactoring of DM core's bio splitting
+ * (by eliminating DM's splitting and just using bio_split)
+ */
+ part_stat_lock();
+ __dm_part_stat_sub(&dm_disk(md)->part0,
+ sectors[op_stat_group(bio_op(bio))], ci.sector_count);
+ part_stat_unlock();
+
bio_chain(b, bio);
+ trace_block_split(md->queue, b, bio->bi_iter.bi_sector);
ret = generic_make_request(bio);
break;
}
@@ -1713,6 +1730,15 @@ out:
return ret;
}
+static blk_qc_t dm_process_bio(struct mapped_device *md,
+ struct dm_table *map, struct bio *bio)
+{
+ if (dm_get_md_type(md) == DM_TYPE_NVME_BIO_BASED)
+ return __process_bio(md, map, bio);
+ else
+ return __split_and_process_bio(md, map, bio);
+}
+
static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
{
struct mapped_device *md = q->queuedata;
@@ -1733,10 +1759,7 @@ static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
return ret;
}
- if (dm_get_md_type(md) == DM_TYPE_NVME_BIO_BASED)
- ret = __process_bio(md, map, bio);
- else
- ret = __split_and_process_bio(md, map, bio);
+ ret = dm_process_bio(md, map, bio);
dm_put_live_table(md, srcu_idx);
return ret;
@@ -2415,9 +2438,9 @@ static void dm_wq_work(struct work_struct *work)
break;
if (dm_request_based(md))
- generic_make_request(c);
+ (void) generic_make_request(c);
else
- __split_and_process_bio(md, map, c);
+ (void) dm_process_bio(md, map, c);
}
dm_put_live_table(md, srcu_idx);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index fd4af4de03b4..05ffffb8b769 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -207,15 +207,10 @@ static bool create_on_open = true;
struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
struct mddev *mddev)
{
- struct bio *b;
-
if (!mddev || !bioset_initialized(&mddev->bio_set))
return bio_alloc(gfp_mask, nr_iovecs);
- b = bio_alloc_bioset(gfp_mask, nr_iovecs, &mddev->bio_set);
- if (!b)
- return NULL;
- return b;
+ return bio_alloc_bioset(gfp_mask, nr_iovecs, &mddev->bio_set);
}
EXPORT_SYMBOL_GPL(bio_alloc_mddev);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 1d54109071cc..fa47249fa3e4 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1863,6 +1863,20 @@ static void end_sync_read(struct bio *bio)
reschedule_retry(r1_bio);
}
+static void abort_sync_write(struct mddev *mddev, struct r1bio *r1_bio)
+{
+ sector_t sync_blocks = 0;
+ sector_t s = r1_bio->sector;
+ long sectors_to_go = r1_bio->sectors;
+
+ /* make sure these bits don't get cleared. */
+ do {
+ md_bitmap_end_sync(mddev->bitmap, s, &sync_blocks, 1);
+ s += sync_blocks;
+ sectors_to_go -= sync_blocks;
+ } while (sectors_to_go > 0);
+}
+
static void end_sync_write(struct bio *bio)
{
int uptodate = !bio->bi_status;
@@ -1874,15 +1888,7 @@ static void end_sync_write(struct bio *bio)
struct md_rdev *rdev = conf->mirrors[find_bio_disk(r1_bio, bio)].rdev;
if (!uptodate) {
- sector_t sync_blocks = 0;
- sector_t s = r1_bio->sector;
- long sectors_to_go = r1_bio->sectors;
- /* make sure these bits doesn't get cleared. */
- do {
- md_bitmap_end_sync(mddev->bitmap, s, &sync_blocks, 1);
- s += sync_blocks;
- sectors_to_go -= sync_blocks;
- } while (sectors_to_go > 0);
+ abort_sync_write(mddev, r1_bio);
set_bit(WriteErrorSeen, &rdev->flags);
if (!test_and_set_bit(WantReplacement, &rdev->flags))
set_bit(MD_RECOVERY_NEEDED, &
@@ -2172,8 +2178,10 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
(i == r1_bio->read_disk ||
!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))))
continue;
- if (test_bit(Faulty, &conf->mirrors[i].rdev->flags))
+ if (test_bit(Faulty, &conf->mirrors[i].rdev->flags)) {
+ abort_sync_write(mddev, r1_bio);
continue;
+ }
bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
if (test_bit(FailFast, &conf->mirrors[i].rdev->flags))
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index ec3a5ef7fee0..cbbe6b6535be 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -1935,12 +1935,14 @@ out:
}
static struct stripe_head *
-r5c_recovery_alloc_stripe(struct r5conf *conf,
- sector_t stripe_sect)
+r5c_recovery_alloc_stripe(
+ struct r5conf *conf,
+ sector_t stripe_sect,
+ int noblock)
{
struct stripe_head *sh;
- sh = raid5_get_active_stripe(conf, stripe_sect, 0, 1, 0);
+ sh = raid5_get_active_stripe(conf, stripe_sect, 0, noblock, 0);
if (!sh)
return NULL; /* no more stripe available */
@@ -2150,7 +2152,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
stripe_sect);
if (!sh) {
- sh = r5c_recovery_alloc_stripe(conf, stripe_sect);
+ sh = r5c_recovery_alloc_stripe(conf, stripe_sect, 1);
/*
* cannot get stripe from raid5_get_active_stripe
* try replay some stripes
@@ -2159,20 +2161,29 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
r5c_recovery_replay_stripes(
cached_stripe_list, ctx);
sh = r5c_recovery_alloc_stripe(
- conf, stripe_sect);
+ conf, stripe_sect, 1);
}
if (!sh) {
+ int new_size = conf->min_nr_stripes * 2;
pr_debug("md/raid:%s: Increasing stripe cache size to %d to recovery data on journal.\n",
mdname(mddev),
- conf->min_nr_stripes * 2);
- raid5_set_cache_size(mddev,
- conf->min_nr_stripes * 2);
- sh = r5c_recovery_alloc_stripe(conf,
- stripe_sect);
+ new_size);
+ ret = raid5_set_cache_size(mddev, new_size);
+ if (conf->min_nr_stripes <= new_size / 2) {
+ pr_err("md/raid:%s: Cannot increase cache size, ret=%d, new_size=%d, min_nr_stripes=%d, max_nr_stripes=%d\n",
+ mdname(mddev),
+ ret,
+ new_size,
+ conf->min_nr_stripes,
+ conf->max_nr_stripes);
+ return -ENOMEM;
+ }
+ sh = r5c_recovery_alloc_stripe(
+ conf, stripe_sect, 0);
}
if (!sh) {
pr_err("md/raid:%s: Cannot get enough stripes due to memory pressure. Recovery failed.\n",
- mdname(mddev));
+ mdname(mddev));
return -ENOMEM;
}
list_add_tail(&sh->lru, cached_stripe_list);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 4990f0319f6c..cecea901ab8c 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -6369,6 +6369,7 @@ raid5_show_stripe_cache_size(struct mddev *mddev, char *page)
int
raid5_set_cache_size(struct mddev *mddev, int size)
{
+ int result = 0;
struct r5conf *conf = mddev->private;
if (size <= 16 || size > 32768)
@@ -6385,11 +6386,14 @@ raid5_set_cache_size(struct mddev *mddev, int size)
mutex_lock(&conf->cache_size_mutex);
while (size > conf->max_nr_stripes)
- if (!grow_one_stripe(conf, GFP_KERNEL))
+ if (!grow_one_stripe(conf, GFP_KERNEL)) {
+ conf->min_nr_stripes = conf->max_nr_stripes;
+ result = -ENOMEM;
break;
+ }
mutex_unlock(&conf->cache_size_mutex);
- return 0;
+ return result;
}
EXPORT_SYMBOL(raid5_set_cache_size);