diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/Makefile | 3 | ||||
-rw-r--r-- | drivers/md/dm-bufio.c | 60 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 161 | ||||
-rw-r--r-- | drivers/md/dm-dust.c | 58 | ||||
-rw-r--r-- | drivers/md/dm-ebs-target.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-init.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-integrity.c | 6 | ||||
-rw-r--r-- | drivers/md/dm-ioctl.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 146 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-rq.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-verity-target.c | 13 | ||||
-rw-r--r-- | drivers/md/dm-verity-verify-sig.h | 14 | ||||
-rw-r--r-- | drivers/md/dm-verity.h | 3 | ||||
-rw-r--r-- | drivers/md/dm-writecache.c | 2 | ||||
-rw-r--r-- | drivers/md/dm.c | 3 | ||||
-rw-r--r-- | drivers/md/md-autodetect.c | 291 | ||||
-rw-r--r-- | drivers/md/md.c | 38 | ||||
-rw-r--r-- | drivers/md/md.h | 12 | ||||
-rw-r--r-- | drivers/md/raid5.c | 2 | ||||
-rw-r--r-- | drivers/md/raid5.h | 2 |
21 files changed, 665 insertions, 160 deletions
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 31840f95cd40..6d3e234dc46a 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -43,6 +43,9 @@ obj-$(CONFIG_MD_FAULTY) += faulty.o obj-$(CONFIG_MD_CLUSTER) += md-cluster.o obj-$(CONFIG_BCACHE) += bcache/ obj-$(CONFIG_BLK_DEV_MD) += md-mod.o +ifeq ($(CONFIG_BLK_DEV_MD),y) +obj-y += md-autodetect.o +endif obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o obj-$(CONFIG_BLK_DEV_DM_BUILTIN) += dm-builtin.o obj-$(CONFIG_DM_UNSTRIPED) += dm-unstripe.o diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 6d1565021d74..9c1a86bde658 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -108,7 +108,10 @@ struct dm_bufio_client { int async_write_error; struct list_head client_list; + struct shrinker shrinker; + struct work_struct shrink_work; + atomic_long_t need_shrink; }; /* @@ -1634,8 +1637,7 @@ static unsigned long get_retain_buffers(struct dm_bufio_client *c) return retain_bytes; } -static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, - gfp_t gfp_mask) +static void __scan(struct dm_bufio_client *c) { int l; struct dm_buffer *b, *tmp; @@ -1646,42 +1648,58 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, for (l = 0; l < LIST_SIZE; l++) { list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) { - if (__try_evict_buffer(b, gfp_mask)) + if (count - freed <= retain_target) + atomic_long_set(&c->need_shrink, 0); + if (!atomic_long_read(&c->need_shrink)) + return; + if (__try_evict_buffer(b, GFP_KERNEL)) { + atomic_long_dec(&c->need_shrink); freed++; - if (!--nr_to_scan || ((count - freed) <= retain_target)) - return freed; + } cond_resched(); } } - return freed; } -static unsigned long -dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) +static void shrink_work(struct work_struct *w) +{ + struct dm_bufio_client *c = container_of(w, struct dm_bufio_client, shrink_work); + + dm_bufio_lock(c); + __scan(c); + dm_bufio_unlock(c); +} + +static unsigned long dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { struct dm_bufio_client *c; - unsigned long freed; c = container_of(shrink, struct dm_bufio_client, shrinker); - if (sc->gfp_mask & __GFP_FS) - dm_bufio_lock(c); - else if (!dm_bufio_trylock(c)) - return SHRINK_STOP; + atomic_long_add(sc->nr_to_scan, &c->need_shrink); + queue_work(dm_bufio_wq, &c->shrink_work); - freed = __scan(c, sc->nr_to_scan, sc->gfp_mask); - dm_bufio_unlock(c); - return freed; + return sc->nr_to_scan; } -static unsigned long -dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) +static unsigned long dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker); unsigned long count = READ_ONCE(c->n_buffers[LIST_CLEAN]) + READ_ONCE(c->n_buffers[LIST_DIRTY]); unsigned long retain_target = get_retain_buffers(c); + unsigned long queued_for_cleanup = atomic_long_read(&c->need_shrink); + + if (unlikely(count < retain_target)) + count = 0; + else + count -= retain_target; - return (count < retain_target) ? 0 : (count - retain_target); + if (unlikely(count < queued_for_cleanup)) + count = 0; + else + count -= queued_for_cleanup; + + return count; } /* @@ -1772,6 +1790,9 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign __free_buffer_wake(b); } + INIT_WORK(&c->shrink_work, shrink_work); + atomic_long_set(&c->need_shrink, 0); + c->shrinker.count_objects = dm_bufio_shrink_count; c->shrinker.scan_objects = dm_bufio_shrink_scan; c->shrinker.seeks = 1; @@ -1817,6 +1838,7 @@ void dm_bufio_client_destroy(struct dm_bufio_client *c) drop_buffers(c); unregister_shrinker(&c->shrinker); + flush_work(&c->shrink_work); mutex_lock(&dm_bufio_clients_lock); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index b437a14c4942..148960721254 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -69,6 +69,7 @@ struct dm_crypt_io { u8 *integrity_metadata; bool integrity_metadata_from_pool; struct work_struct work; + struct tasklet_struct tasklet; struct convert_context ctx; @@ -127,7 +128,9 @@ struct iv_elephant_private { * and encrypts / decrypts at the same time. */ enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID, - DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD }; + DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD, + DM_CRYPT_NO_READ_WORKQUEUE, DM_CRYPT_NO_WRITE_WORKQUEUE, + DM_CRYPT_WRITE_INLINE }; enum cipher_flags { CRYPT_MODE_INTEGRITY_AEAD, /* Use authenticated mode for cihper */ @@ -407,7 +410,7 @@ static void crypt_iv_lmk_dtr(struct crypt_config *cc) crypto_free_shash(lmk->hash_tfm); lmk->hash_tfm = NULL; - kzfree(lmk->seed); + kfree_sensitive(lmk->seed); lmk->seed = NULL; } @@ -558,9 +561,9 @@ static void crypt_iv_tcw_dtr(struct crypt_config *cc) { struct iv_tcw_private *tcw = &cc->iv_gen_private.tcw; - kzfree(tcw->iv_seed); + kfree_sensitive(tcw->iv_seed); tcw->iv_seed = NULL; - kzfree(tcw->whitening); + kfree_sensitive(tcw->whitening); tcw->whitening = NULL; if (tcw->crc32_tfm && !IS_ERR(tcw->crc32_tfm)) @@ -994,8 +997,8 @@ static int crypt_iv_elephant(struct crypt_config *cc, struct dm_crypt_request *d kunmap_atomic(data); out: - kzfree(ks); - kzfree(es); + kfree_sensitive(ks); + kfree_sensitive(es); skcipher_request_free(req); return r; } @@ -1523,7 +1526,7 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_ * Encrypt / decrypt data from one bio to another one (can be the same one) */ static blk_status_t crypt_convert(struct crypt_config *cc, - struct convert_context *ctx) + struct convert_context *ctx, bool atomic) { unsigned int tag_offset = 0; unsigned int sector_step = cc->sector_size >> SECTOR_SHIFT; @@ -1566,7 +1569,8 @@ static blk_status_t crypt_convert(struct crypt_config *cc, atomic_dec(&ctx->cc_pending); ctx->cc_sector += sector_step; tag_offset++; - cond_resched(); + if (!atomic) + cond_resched(); continue; /* * There was a data integrity error. @@ -1892,7 +1896,8 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) clone->bi_iter.bi_sector = cc->start + io->sector; - if (likely(!async) && test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) { + if ((likely(!async) && test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) || + test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags)) { submit_bio_noacct(clone); return; } @@ -1915,9 +1920,32 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) spin_unlock_irqrestore(&cc->write_thread_lock, flags); } +static bool kcryptd_crypt_write_inline(struct crypt_config *cc, + struct convert_context *ctx) + +{ + if (!test_bit(DM_CRYPT_WRITE_INLINE, &cc->flags)) + return false; + + /* + * Note: zone append writes (REQ_OP_ZONE_APPEND) do not have ordering + * constraints so they do not need to be issued inline by + * kcryptd_crypt_write_convert(). + */ + switch (bio_op(ctx->bio_in)) { + case REQ_OP_WRITE: + case REQ_OP_WRITE_SAME: + case REQ_OP_WRITE_ZEROES: + return true; + default: + return false; + } +} + static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) { struct crypt_config *cc = io->cc; + struct convert_context *ctx = &io->ctx; struct bio *clone; int crypt_finished; sector_t sector = io->sector; @@ -1927,7 +1955,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) * Prevent io from disappearing until this function completes. */ crypt_inc_pending(io); - crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, sector); + crypt_convert_init(cc, ctx, NULL, io->base_bio, sector); clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size); if (unlikely(!clone)) { @@ -1941,10 +1969,16 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) sector += bio_sectors(clone); crypt_inc_pending(io); - r = crypt_convert(cc, &io->ctx); + r = crypt_convert(cc, ctx, + test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags)); if (r) io->error = r; - crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending); + crypt_finished = atomic_dec_and_test(&ctx->cc_pending); + if (!crypt_finished && kcryptd_crypt_write_inline(cc, ctx)) { + /* Wait for completion signaled by kcryptd_async_done() */ + wait_for_completion(&ctx->restart); + crypt_finished = 1; + } /* Encryption was already finished, submit io now */ if (crypt_finished) { @@ -1971,7 +2005,8 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio, io->sector); - r = crypt_convert(cc, &io->ctx); + r = crypt_convert(cc, &io->ctx, + test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags)); if (r) io->error = r; @@ -2015,10 +2050,21 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, if (!atomic_dec_and_test(&ctx->cc_pending)) return; - if (bio_data_dir(io->base_bio) == READ) + /* + * The request is fully completed: for inline writes, let + * kcryptd_crypt_write_convert() do the IO submission. + */ + if (bio_data_dir(io->base_bio) == READ) { kcryptd_crypt_read_done(io); - else - kcryptd_crypt_write_io_submit(io, 1); + return; + } + + if (kcryptd_crypt_write_inline(cc, ctx)) { + complete(&ctx->restart); + return; + } + + kcryptd_crypt_write_io_submit(io, 1); } static void kcryptd_crypt(struct work_struct *work) @@ -2031,10 +2077,28 @@ static void kcryptd_crypt(struct work_struct *work) kcryptd_crypt_write_convert(io); } +static void kcryptd_crypt_tasklet(unsigned long work) +{ + kcryptd_crypt((struct work_struct *)work); +} + static void kcryptd_queue_crypt(struct dm_crypt_io *io) { struct crypt_config *cc = io->cc; + if ((bio_data_dir(io->base_bio) == READ && test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags)) || + (bio_data_dir(io->base_bio) == WRITE && test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags))) { + if (in_irq()) { + /* Crypto API's "skcipher_walk_first() refuses to work in hard IRQ context */ + tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work); + tasklet_schedule(&io->tasklet); + return; + } + + kcryptd_crypt(&io->work); + return; + } + INIT_WORK(&io->work, kcryptd_crypt); queue_work(cc->crypt_queue, &io->work); } @@ -2294,7 +2358,7 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string key = request_key(type, key_desc + 1, NULL); if (IS_ERR(key)) { - kzfree(new_key_string); + kfree_sensitive(new_key_string); return PTR_ERR(key); } @@ -2304,7 +2368,7 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string if (ret < 0) { up_read(&key->sem); key_put(key); - kzfree(new_key_string); + kfree_sensitive(new_key_string); return ret; } @@ -2318,10 +2382,10 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string if (!ret) { set_bit(DM_CRYPT_KEY_VALID, &cc->flags); - kzfree(cc->key_string); + kfree_sensitive(cc->key_string); cc->key_string = new_key_string; } else - kzfree(new_key_string); + kfree_sensitive(new_key_string); return ret; } @@ -2382,7 +2446,7 @@ static int crypt_set_key(struct crypt_config *cc, char *key) clear_bit(DM_CRYPT_KEY_VALID, &cc->flags); /* wipe references to any kernel keyring key */ - kzfree(cc->key_string); + kfree_sensitive(cc->key_string); cc->key_string = NULL; /* Decode key from its hex representation. */ @@ -2414,7 +2478,7 @@ static int crypt_wipe_key(struct crypt_config *cc) return r; } - kzfree(cc->key_string); + kfree_sensitive(cc->key_string); cc->key_string = NULL; r = crypt_setkey(cc); memset(&cc->key, 0, cc->key_size * sizeof(u8)); @@ -2493,15 +2557,15 @@ static void crypt_dtr(struct dm_target *ti) if (cc->dev) dm_put_device(ti, cc->dev); - kzfree(cc->cipher_string); - kzfree(cc->key_string); - kzfree(cc->cipher_auth); - kzfree(cc->authenc_key); + kfree_sensitive(cc->cipher_string); + kfree_sensitive(cc->key_string); + kfree_sensitive(cc->cipher_auth); + kfree_sensitive(cc->authenc_key); mutex_destroy(&cc->bio_alloc_lock); /* Must zero key material before freeing */ - kzfree(cc); + kfree_sensitive(cc); spin_lock(&dm_crypt_clients_lock); WARN_ON(!dm_crypt_clients_n); @@ -2838,7 +2902,7 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar struct crypt_config *cc = ti->private; struct dm_arg_set as; static const struct dm_arg _args[] = { - {0, 6, "Invalid number of feature args"}, + {0, 8, "Invalid number of feature args"}, }; unsigned int opt_params, val; const char *opt_string, *sval; @@ -2868,6 +2932,10 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar else if (!strcasecmp(opt_string, "submit_from_crypt_cpus")) set_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags); + else if (!strcasecmp(opt_string, "no_read_workqueue")) + set_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags); + else if (!strcasecmp(opt_string, "no_write_workqueue")) + set_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags); else if (sscanf(opt_string, "integrity:%u:", &val) == 1) { if (val == 0 || val > MAX_TAG_SIZE) { ti->error = "Invalid integrity arguments"; @@ -2908,6 +2976,21 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar return 0; } +#ifdef CONFIG_BLK_DEV_ZONED + +static int crypt_report_zones(struct dm_target *ti, + struct dm_report_zones_args *args, unsigned int nr_zones) +{ + struct crypt_config *cc = ti->private; + sector_t sector = cc->start + dm_target_offset(ti, args->next_sector); + + args->start = cc->start; + return blkdev_report_zones(cc->dev->bdev, sector, nr_zones, + dm_report_zones_cb, args); +} + +#endif + /* * Construct an encryption mapping: * <cipher> [<key>|:<key_size>:<user|logon>:<key_description>] <iv_offset> <dev_path> <start> @@ -3041,6 +3124,16 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) } cc->start = tmpll; + /* + * For zoned block devices, we need to preserve the issuer write + * ordering. To do so, disable write workqueues and force inline + * encryption completion. + */ + if (bdev_is_zoned(cc->dev->bdev)) { + set_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags); + set_bit(DM_CRYPT_WRITE_INLINE, &cc->flags); + } + if (crypt_integrity_aead(cc) || cc->integrity_iv_size) { ret = crypt_integrity_ctr(cc, ti); if (ret) @@ -3196,6 +3289,8 @@ static void crypt_status(struct dm_target *ti, status_type_t type, num_feature_args += !!ti->num_discard_bios; num_feature_args += test_bit(DM_CRYPT_SAME_CPU, &cc->flags); num_feature_args += test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags); + num_feature_args += test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags); + num_feature_args += test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags); num_feature_args += cc->sector_size != (1 << SECTOR_SHIFT); num_feature_args += test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags); if (cc->on_disk_tag_size) @@ -3208,6 +3303,10 @@ static void crypt_status(struct dm_target *ti, status_type_t type, DMEMIT(" same_cpu_crypt"); if (test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) DMEMIT(" submit_from_crypt_cpus"); + if (test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags)) + DMEMIT(" no_read_workqueue"); + if (test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags)) + DMEMIT(" no_write_workqueue"); if (cc->on_disk_tag_size) DMEMIT(" integrity:%u:%s", cc->on_disk_tag_size, cc->cipher_auth); if (cc->sector_size != (1 << SECTOR_SHIFT)) @@ -3320,10 +3419,14 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type crypt_target = { .name = "crypt", - .version = {1, 21, 0}, + .version = {1, 22, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, +#ifdef CONFIG_BLK_DEV_ZONED + .features = DM_TARGET_ZONED_HM, + .report_zones = crypt_report_zones, +#endif .map = crypt_map, .status = crypt_status, .postsuspend = crypt_postsuspend, diff --git a/drivers/md/dm-dust.c b/drivers/md/dm-dust.c index ff03b90072c5..072ea913cebc 100644 --- a/drivers/md/dm-dust.c +++ b/drivers/md/dm-dust.c @@ -138,20 +138,22 @@ static int dust_add_block(struct dust_device *dd, unsigned long long block, return 0; } -static int dust_query_block(struct dust_device *dd, unsigned long long block) +static int dust_query_block(struct dust_device *dd, unsigned long long block, char *result, + unsigned int maxlen, unsigned int *sz_ptr) { struct badblock *bblock; unsigned long flags; + unsigned int sz = *sz_ptr; spin_lock_irqsave(&dd->dust_lock, flags); bblock = dust_rb_search(&dd->badblocklist, block); if (bblock != NULL) - DMINFO("%s: block %llu found in badblocklist", __func__, block); + DMEMIT("%s: block %llu found in badblocklist", __func__, block); else - DMINFO("%s: block %llu not found in badblocklist", __func__, block); + DMEMIT("%s: block %llu not found in badblocklist", __func__, block); spin_unlock_irqrestore(&dd->dust_lock, flags); - return 0; + return 1; } static int __dust_map_read(struct dust_device *dd, sector_t thisblock) @@ -259,11 +261,13 @@ static bool __dust_clear_badblocks(struct rb_root *tree, return true; } -static int dust_clear_badblocks(struct dust_device *dd) +static int dust_clear_badblocks(struct dust_device *dd, char *result, unsigned int maxlen, + unsigned int *sz_ptr) { unsigned long flags; struct rb_root badblocklist; unsigned long long badblock_count; + unsigned int sz = *sz_ptr; spin_lock_irqsave(&dd->dust_lock, flags); badblocklist = dd->badblocklist; @@ -273,11 +277,36 @@ static int dust_clear_badblocks(struct dust_device *dd) spin_unlock_irqrestore(&dd->dust_lock, flags); if (!__dust_clear_badblocks(&badblocklist, badblock_count)) - DMINFO("%s: no badblocks found", __func__); + DMEMIT("%s: no badblocks found", __func__); else - DMINFO("%s: badblocks cleared", __func__); + DMEMIT("%s: badblocks cleared", __func__); - return 0; + return 1; +} + +static int dust_list_badblocks(struct dust_device *dd, char *result, unsigned int maxlen, + unsigned int *sz_ptr) +{ + unsigned long flags; + struct rb_root badblocklist; + struct rb_node *node; + struct badblock *bblk; + unsigned int sz = *sz_ptr; + unsigned long long num = 0; + + spin_lock_irqsave(&dd->dust_lock, flags); + badblocklist = dd->badblocklist; + for (node = rb_first(&badblocklist); node; node = rb_next(node)) { + bblk = rb_entry(node, struct badblock, node); + DMEMIT("%llu\n", bblk->bb); + num++; + } + + spin_unlock_irqrestore(&dd->dust_lock, flags); + if (!num) + DMEMIT("No blocks in badblocklist"); + + return 1; } /* @@ -383,7 +412,7 @@ static void dust_dtr(struct dm_target *ti) } static int dust_message(struct dm_target *ti, unsigned int argc, char **argv, - char *result_buf, unsigned int maxlen) + char *result, unsigned int maxlen) { struct dust_device *dd = ti->private; sector_t size = i_size_read(dd->dev->bdev->bd_inode) >> SECTOR_SHIFT; @@ -393,6 +422,7 @@ static int dust_message(struct dm_target *ti, unsigned int argc, char **argv, unsigned char wr_fail_cnt; unsigned int tmp_ui; unsigned long flags; + unsigned int sz = 0; char dummy; if (argc == 1) { @@ -410,18 +440,20 @@ static int dust_message(struct dm_target *ti, unsigned int argc, char **argv, r = 0; } else if (!strcasecmp(argv[0], "countbadblocks")) { spin_lock_irqsave(&dd->dust_lock, flags); - DMINFO("countbadblocks: %llu badblock(s) found", + DMEMIT("countbadblocks: %llu badblock(s) found", dd->badblock_count); spin_unlock_irqrestore(&dd->dust_lock, flags); - r = 0; + r = 1; } else if (!strcasecmp(argv[0], "clearbadblocks")) { - r = dust_clear_badblocks(dd); + r = dust_clear_badblocks(dd, result, maxlen, &sz); } else if (!strcasecmp(argv[0], "quiet")) { if (!dd->quiet_mode) dd->quiet_mode = true; else dd->quiet_mode = false; r = 0; + } else if (!strcasecmp(argv[0], "listbadblocks")) { + r = dust_list_badblocks(dd, result, maxlen, &sz); } else { invalid_msg = true; } @@ -441,7 +473,7 @@ static int dust_message(struct dm_target *ti, unsigned int argc, char **argv, else if (!strcasecmp(argv[0], "removebadblock")) r = dust_remove_block(dd, block); else if (!strcasecmp(argv[0], "queryblock")) - r = dust_query_block(dd, block); + r = dust_query_block(dd, block, result, maxlen, &sz); else invalid_msg = true; diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c index 44451276f128..cb85610527c2 100644 --- a/drivers/md/dm-ebs-target.c +++ b/drivers/md/dm-ebs-target.c @@ -363,7 +363,7 @@ static int ebs_map(struct dm_target *ti, struct bio *bio) bio_set_dev(bio, ec->dev->bdev); bio->bi_iter.bi_sector = ec->start + dm_target_offset(ti, bio->bi_iter.bi_sector); - if (unlikely(bio->bi_opf & REQ_OP_FLUSH)) + if (unlikely(bio_op(bio) == REQ_OP_FLUSH)) return DM_MAPIO_REMAPPED; /* * Only queue for bufio processing in case of partial or overlapping buffers diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c index b869316d3722..b0c45c6ebe0b 100644 --- a/drivers/md/dm-init.c +++ b/drivers/md/dm-init.c @@ -36,7 +36,7 @@ struct dm_device { struct list_head list; }; -const char * const dm_allowed_targets[] __initconst = { +static const char * const dm_allowed_targets[] __initconst = { "crypt", "delay", "linear", diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 5da3eb661e50..8c8d940e532e 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -3405,8 +3405,8 @@ static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_int static void free_alg(struct alg_spec *a) { - kzfree(a->alg_string); - kzfree(a->key); + kfree_sensitive(a->alg_string); + kfree_sensitive(a->key); memset(a, 0, sizeof *a); } @@ -4337,7 +4337,7 @@ static void dm_integrity_dtr(struct dm_target *ti) for (i = 0; i < ic->journal_sections; i++) { struct skcipher_request *req = ic->sk_requests[i]; if (req) { - kzfree(req->iv); + kfree_sensitive(req->iv); skcipher_request_free(req); } } diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 056d891a32a9..28122e850ea1 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1168,7 +1168,7 @@ static void retrieve_status(struct dm_table *table, spec->sector_start = ti->begin; spec->length = ti->len; strncpy(spec->target_type, ti->type->name, - sizeof(spec->target_type)); + sizeof(spec->target_type) - 1); outptr += sizeof(struct dm_target_spec); remaining = len - (outptr - outbuf); diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 73bb23de6336..53645a6f474c 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -128,6 +128,20 @@ static void queue_if_no_path_timeout_work(struct timer_list *t); #define MPATHF_PG_INIT_REQUIRED 5 /* pg_init needs calling? */ #define MPATHF_PG_INIT_DELAY_RETRY 6 /* Delay pg_init retry? */ +static bool mpath_double_check_test_bit(int MPATHF_bit, struct multipath *m) +{ + bool r = test_bit(MPATHF_bit, &m->flags); + + if (r) { + unsigned long flags; + spin_lock_irqsave(&m->lock, flags); + r = test_bit(MPATHF_bit, &m->flags); + spin_unlock_irqrestore(&m->lock, flags); + } + + return r; +} + /*----------------------------------------------- * Allocation routines *-----------------------------------------------*/ @@ -335,6 +349,8 @@ static int pg_init_all_paths(struct multipath *m) static void __switch_pg(struct multipath *m, struct priority_group *pg) { + lockdep_assert_held(&m->lock); + m->current_pg = pg; /* Must we initialise the PG first, and queue I/O till it's ready? */ @@ -382,7 +398,9 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes) unsigned bypassed = 1; if (!atomic_read(&m->nr_valid_paths)) { + spin_lock_irqsave(&m->lock, flags); clear_bit(MPATHF_QUEUE_IO, &m->flags); + spin_unlock_irqrestore(&m->lock, flags); goto failed; } @@ -422,8 +440,11 @@ check_current_pg: continue; pgpath = choose_path_in_pg(m, pg, nr_bytes); if (!IS_ERR_OR_NULL(pgpath)) { - if (!bypassed) + if (!bypassed) { + spin_lock_irqsave(&m->lock, flags); set_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags); + spin_unlock_irqrestore(&m->lock, flags); + } return pgpath; } } @@ -465,7 +486,14 @@ static bool __must_push_back(struct multipath *m) static bool must_push_back_rq(struct multipath *m) { - return test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) || __must_push_back(m); + unsigned long flags; + bool ret; + + spin_lock_irqsave(&m->lock, flags); + ret = (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) || __must_push_back(m)); + spin_unlock_irqrestore(&m->lock, flags); + + return ret; } /* @@ -485,7 +513,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, /* Do we need to select a new pgpath? */ pgpath = READ_ONCE(m->current_pgpath); - if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags)) + if (!pgpath || !mpath_double_check_test_bit(MPATHF_QUEUE_IO, m)) pgpath = choose_pgpath(m, nr_bytes); if (!pgpath) { @@ -493,8 +521,8 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, return DM_MAPIO_DELAY_REQUEUE; dm_report_EIO(m); /* Failed */ return DM_MAPIO_KILL; - } else if (test_bit(MPATHF_QUEUE_IO, &m->flags) || - test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) { + } else if (mpath_double_check_test_bit(MPATHF_QUEUE_IO, m) || + mpath_double_check_test_bit(MPATHF_PG_INIT_REQUIRED, m)) { pg_init_all_paths(m); return DM_MAPIO_DELAY_REQUEUE; } @@ -560,33 +588,45 @@ static void multipath_release_clone(struct request *clone, * Map cloned bios (bio-based multipath) */ +static void __multipath_queue_bio(struct multipath *m, struct bio *bio) +{ + /* Queue for the daemon to resubmit */ + bio_list_add(&m->queued_bios, bio); + if (!test_bit(MPATHF_QUEUE_IO, &m->flags)) + queue_work(kmultipathd, &m->process_queued_bios); +} + +static void multipath_queue_bio(struct multipath *m, struct bio *bio) +{ + unsigned long flags; + + spin_lock_irqsave(&m->lock, flags); + __multipath_queue_bio(m, bio); + spin_unlock_irqrestore(&m->lock, flags); +} + static struct pgpath *__map_bio(struct multipath *m, struct bio *bio) { struct pgpath *pgpath; unsigned long flags; - bool queue_io; /* Do we need to select a new pgpath? */ pgpath = READ_ONCE(m->current_pgpath); - if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags)) + if (!pgpath || !mpath_double_check_test_bit(MPATHF_QUEUE_IO, m)) pgpath = choose_pgpath(m, bio->bi_iter.bi_size); - /* MPATHF_QUEUE_IO might have been cleared by choose_pgpath. */ - queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags); - - if ((pgpath && queue_io) || - (!pgpath && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))) { - /* Queue for the daemon to resubmit */ + if (!pgpath) { spin_lock_irqsave(&m->lock, flags); - bio_list_add(&m->queued_bios, bio); + if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { + __multipath_queue_bio(m, bio); + pgpath = ERR_PTR(-EAGAIN); + } spin_unlock_irqrestore(&m->lock, flags); - /* PG_INIT_REQUIRED cannot be set without QUEUE_IO */ - if (queue_io || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) - pg_init_all_paths(m); - else if (!queue_io) - queue_work(kmultipathd, &m->process_queued_bios); - + } else if (mpath_double_check_test_bit(MPATHF_QUEUE_IO, m) || + mpath_double_check_test_bit(MPATHF_PG_INIT_REQUIRED, m)) { + multipath_queue_bio(m, bio); + pg_init_all_paths(m); return ERR_PTR(-EAGAIN); } @@ -835,7 +875,7 @@ static int setup_scsi_dh(struct block_device *bdev, struct multipath *m, struct request_queue *q = bdev_get_queue(bdev); int r; - if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)) { + if (mpath_double_check_test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, m)) { retain: if (*attached_handler_name) { /* @@ -1614,7 +1654,7 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone, if (pgpath) fail_path(pgpath); - if (atomic_read(&m->nr_valid_paths) == 0 && + if (!atomic_read(&m->nr_valid_paths) && !must_push_back_rq(m)) { if (error == BLK_STS_IOERR) dm_report_EIO(m); @@ -1649,23 +1689,22 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, if (pgpath) fail_path(pgpath); - if (atomic_read(&m->nr_valid_paths) == 0 && - !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { - if (__must_push_back(m)) { - r = DM_ENDIO_REQUEUE; - } else { - dm_report_EIO(m); - *error = BLK_STS_IOERR; + if (!atomic_read(&m->nr_valid_paths)) { + spin_lock_irqsave(&m->lock, flags); + if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { + if (__must_push_back(m)) { + r = DM_ENDIO_REQUEUE; + } else { + dm_report_EIO(m); + *error = BLK_STS_IOERR; + } + spin_unlock_irqrestore(&m->lock, flags); + goto done; } - goto done; + spin_unlock_irqrestore(&m->lock, flags); } - spin_lock_irqsave(&m->lock, flags); - bio_list_add(&m->queued_bios, clone); - spin_unlock_irqrestore(&m->lock, flags); - if (!test_bit(MPATHF_QUEUE_IO, &m->flags)) - queue_work(kmultipathd, &m->process_queued_bios); - + multipath_queue_bio(m, clone); r = DM_ENDIO_INCOMPLETE; done: if (pgpath) { @@ -1937,16 +1976,17 @@ static int multipath_prepare_ioctl(struct dm_target *ti, struct block_device **bdev) { struct multipath *m = ti->private; - struct pgpath *current_pgpath; + struct pgpath *pgpath; + unsigned long flags; int r; - current_pgpath = READ_ONCE(m->current_pgpath); - if (!current_pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags)) - current_pgpath = choose_pgpath(m, 0); + pgpath = READ_ONCE(m->current_pgpath); + if (!pgpath || !mpath_double_check_test_bit(MPATHF_QUEUE_IO, m)) + pgpath = choose_pgpath(m, 0); - if (current_pgpath) { - if (!test_bit(MPATHF_QUEUE_IO, &m->flags)) { - *bdev = current_pgpath->path.dev->bdev; + if (pgpath) { + if (!mpath_double_check_test_bit(MPATHF_QUEUE_IO, m)) { + *bdev = pgpath->path.dev->bdev; r = 0; } else { /* pg_init has not started or completed */ @@ -1954,10 +1994,11 @@ static int multipath_prepare_ioctl(struct dm_target *ti, } } else { /* No path is available */ + r = -EIO; + spin_lock_irqsave(&m->lock, flags); if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) r = -ENOTCONN; - else - r = -EIO; + spin_unlock_irqrestore(&m->lock, flags); } if (r == -ENOTCONN) { @@ -1965,8 +2006,10 @@ static int multipath_prepare_ioctl(struct dm_target *ti, /* Path status changed, redo selection */ (void) choose_pgpath(m, 0); } + spin_lock_irqsave(&m->lock, flags); if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) - pg_init_all_paths(m); + (void) __pg_init_all_paths(m); + spin_unlock_irqrestore(&m->lock, flags); dm_table_run_md_queue_async(m->ti->table); process_queued_io_list(m); } @@ -2026,8 +2069,15 @@ static int multipath_busy(struct dm_target *ti) return true; /* no paths available, for blk-mq: rely on IO mapping to delay requeue */ - if (!atomic_read(&m->nr_valid_paths) && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) - return (m->queue_mode != DM_TYPE_REQUEST_BASED); + if (!atomic_read(&m->nr_valid_paths)) { + unsigned long flags; + spin_lock_irqsave(&m->lock, flags); + if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { + spin_unlock_irqrestore(&m->lock, flags); + return (m->queue_mode != DM_TYPE_REQUEST_BASED); + } + spin_unlock_irqrestore(&m->lock, flags); + } /* Guess which priority_group will be used at next mapping time */ pg = READ_ONCE(m->current_pg); diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index d9e270957e18..8d2b835d7a10 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -2337,8 +2337,6 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) if (new_devs == rs->raid_disks || !rebuilds) { /* Replace a broken device */ - if (new_devs == 1 && !rs->delta_disks) - ; if (new_devs == rs->raid_disks) { DMINFO("Superblocks created for new raid set"); set_bit(MD_ARRAY_FIRST_USE, &mddev->flags); diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 7ce387a1cc6a..6d743ff6a314 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -70,9 +70,6 @@ void dm_start_queue(struct request_queue *q) void dm_stop_queue(struct request_queue *q) { - if (blk_mq_queue_stopped(q)) - return; - blk_mq_quiesce_queue(q); } diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 75fa4d9b7617..f74982dcbea0 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -30,6 +30,7 @@ #define DM_VERITY_OPT_LOGGING "ignore_corruption" #define DM_VERITY_OPT_RESTART "restart_on_corruption" +#define DM_VERITY_OPT_PANIC "panic_on_corruption" #define DM_VERITY_OPT_IGN_ZEROES "ignore_zero_blocks" #define DM_VERITY_OPT_AT_MOST_ONCE "check_at_most_once" @@ -254,6 +255,9 @@ out: if (v->mode == DM_VERITY_MODE_RESTART) kernel_restart("dm-verity device corrupted"); + if (v->mode == DM_VERITY_MODE_PANIC) + panic("dm-verity device corrupted"); + return 1; } @@ -742,6 +746,9 @@ static void verity_status(struct dm_target *ti, status_type_t type, case DM_VERITY_MODE_RESTART: DMEMIT(DM_VERITY_OPT_RESTART); break; + case DM_VERITY_MODE_PANIC: + DMEMIT(DM_VERITY_OPT_PANIC); + break; default: BUG(); } @@ -907,6 +914,10 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v, v->mode = DM_VERITY_MODE_RESTART; continue; + } else if (!strcasecmp(arg_name, DM_VERITY_OPT_PANIC)) { + v->mode = DM_VERITY_MODE_PANIC; + continue; + } else if (!strcasecmp(arg_name, DM_VERITY_OPT_IGN_ZEROES)) { r = verity_alloc_zero_digest(v); if (r) { @@ -1221,7 +1232,7 @@ bad: static struct target_type verity_target = { .name = "verity", - .version = {1, 6, 0}, + .version = {1, 7, 0}, .module = THIS_MODULE, .ctr = verity_ctr, .dtr = verity_dtr, diff --git a/drivers/md/dm-verity-verify-sig.h b/drivers/md/dm-verity-verify-sig.h index 19b1547aa741..3987c7141f79 100644 --- a/drivers/md/dm-verity-verify-sig.h +++ b/drivers/md/dm-verity-verify-sig.h @@ -34,25 +34,25 @@ void verity_verify_sig_opts_cleanup(struct dm_verity_sig_opts *sig_opts); #define DM_VERITY_ROOT_HASH_VERIFICATION_OPTS 0 -int verity_verify_root_hash(const void *data, size_t data_len, - const void *sig_data, size_t sig_len) +static inline int verity_verify_root_hash(const void *data, size_t data_len, + const void *sig_data, size_t sig_len) { return 0; } -bool verity_verify_is_sig_opt_arg(const char *arg_name) +static inline bool verity_verify_is_sig_opt_arg(const char *arg_name) { return false; } -int verity_verify_sig_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v, - struct dm_verity_sig_opts *sig_opts, - unsigned int *argc, const char *arg_name) +static inline int verity_verify_sig_parse_opt_args(struct dm_arg_set *as, + struct dm_verity *v, struct dm_verity_sig_opts *sig_opts, + unsigned int *argc, const char *arg_name) { return -EINVAL; } -void verity_verify_sig_opts_cleanup(struct dm_verity_sig_opts *sig_opts) +static inline void verity_verify_sig_opts_cleanup(struct dm_verity_sig_opts *sig_opts) { } diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index 641b9e3a399b..4e769d13473a 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -20,7 +20,8 @@ enum verity_mode { DM_VERITY_MODE_EIO, DM_VERITY_MODE_LOGGING, - DM_VERITY_MODE_RESTART + DM_VERITY_MODE_RESTART, + DM_VERITY_MODE_PANIC }; enum verity_block_type { diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index cfea054863a4..86dbe0c8b45c 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -538,7 +538,7 @@ static void ssd_commit_superblock(struct dm_writecache *wc) static void writecache_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) { if (WC_MODE_PMEM(wc)) - wmb(); + pmem_wmb(); else ssd_commit_flushed(wc, wait_for_ios); } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 87cf45f619fd..32fa6499739f 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -504,7 +504,8 @@ static int dm_blk_report_zones(struct gendisk *disk, sector_t sector, } args.tgt = tgt; - ret = tgt->type->report_zones(tgt, &args, nr_zones); + ret = tgt->type->report_zones(tgt, &args, + nr_zones - args.zone_idx); if (ret < 0) goto out; } while (args.zone_idx < nr_zones && diff --git a/drivers/md/md-autodetect.c b/drivers/md/md-autodetect.c new file mode 100644 index 000000000000..6bbec89976a7 --- /dev/null +++ b/drivers/md/md-autodetect.c @@ -0,0 +1,291 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/kernel.h> +#include <linux/blkdev.h> +#include <linux/init.h> +#include <linux/mount.h> +#include <linux/major.h> +#include <linux/delay.h> +#include <linux/init_syscalls.h> +#include <linux/raid/detect.h> +#include <linux/raid/md_u.h> +#include <linux/raid/md_p.h> +#include "md.h" + +/* + * When md (and any require personalities) are compiled into the kernel + * (not a module), arrays can be assembles are boot time using with AUTODETECT + * where specially marked partitions are registered with md_autodetect_dev(), + * and with MD_BOOT where devices to be collected are given on the boot line + * with md=..... + * The code for that is here. + */ + +#ifdef CONFIG_MD_AUTODETECT +static int __initdata raid_noautodetect; +#else +static int __initdata raid_noautodetect=1; +#endif +static int __initdata raid_autopart; + +static struct md_setup_args { + int minor; + int partitioned; + int level; + int chunk; + char *device_names; +} md_setup_args[256] __initdata; + +static int md_setup_ents __initdata; + +/* + * Parse the command-line parameters given our kernel, but do not + * actually try to invoke the MD device now; that is handled by + * md_setup_drive after the low-level disk drivers have initialised. + * + * 27/11/1999: Fixed to work correctly with the 2.3 kernel (which + * assigns the task of parsing integer arguments to the + * invoked program now). Added ability to initialise all + * the MD devices (by specifying multiple "md=" lines) + * instead of just one. -- KTK + * 18May2000: Added support for persistent-superblock arrays: + * md=n,0,factor,fault,device-list uses RAID0 for device n + * md=n,-1,factor,fault,device-list uses LINEAR for device n + * md=n,device-list reads a RAID superblock from the devices + * elements in device-list are read by name_to_kdev_t so can be + * a hex number or something like /dev/hda1 /dev/sdb + * 2001-06-03: Dave Cinege <dcinege@psychosis.com> + * Shifted name_to_kdev_t() and related operations to md_set_drive() + * for later execution. Rewrote section to make devfs compatible. + */ +static int __init md_setup(char *str) +{ + int minor, level, factor, fault, partitioned = 0; + char *pername = ""; + char *str1; + int ent; + + if (*str == 'd') { + partitioned = 1; + str++; + } + if (get_option(&str, &minor) != 2) { /* MD Number */ + printk(KERN_WARNING "md: Too few arguments supplied to md=.\n"); + return 0; + } + str1 = str; + for (ent=0 ; ent< md_setup_ents ; ent++) + if (md_setup_args[ent].minor == minor && + md_setup_args[ent].partitioned == partitioned) { + printk(KERN_WARNING "md: md=%s%d, Specified more than once. " + "Replacing previous definition.\n", partitioned?"d":"", minor); + break; + } + if (ent >= ARRAY_SIZE(md_setup_args)) { + printk(KERN_WARNING "md: md=%s%d - too many md initialisations\n", partitioned?"d":"", minor); + return 0; + } + if (ent >= md_setup_ents) + md_setup_ents++; + switch (get_option(&str, &level)) { /* RAID level */ + case 2: /* could be 0 or -1.. */ + if (level == 0 || level == LEVEL_LINEAR) { + if (get_option(&str, &factor) != 2 || /* Chunk Size */ + get_option(&str, &fault) != 2) { + printk(KERN_WARNING "md: Too few arguments supplied to md=.\n"); + return 0; + } + md_setup_args[ent].level = level; + md_setup_args[ent].chunk = 1 << (factor+12); + if (level == LEVEL_LINEAR) + pername = "linear"; + else + pername = "raid0"; + break; + } + /* FALL THROUGH */ + case 1: /* the first device is numeric */ + str = str1; + /* FALL THROUGH */ + case 0: + md_setup_args[ent].level = LEVEL_NONE; + pername="super-block"; + } + + printk(KERN_INFO "md: Will configure md%d (%s) from %s, below.\n", + minor, pername, str); + md_setup_args[ent].device_names = str; + md_setup_args[ent].partitioned = partitioned; + md_setup_args[ent].minor = minor; + + return 1; +} + +static void __init md_setup_drive(struct md_setup_args *args) +{ + char *devname = args->device_names; + dev_t devices[MD_SB_DISKS + 1], mdev; + struct mdu_array_info_s ainfo = { }; + struct block_device *bdev; + struct mddev *mddev; + int err = 0, i; + char name[16]; + + if (args->partitioned) { + mdev = MKDEV(mdp_major, args->minor << MdpMinorShift); + sprintf(name, "md_d%d", args->minor); + } else { + mdev = MKDEV(MD_MAJOR, args->minor); + sprintf(name, "md%d", args->minor); + } + + for (i = 0; i < MD_SB_DISKS && devname != NULL; i++) { + struct kstat stat; + char *p; + char comp_name[64]; + dev_t dev; + + p = strchr(devname, ','); + if (p) + *p++ = 0; + + dev = name_to_dev_t(devname); + if (strncmp(devname, "/dev/", 5) == 0) + devname += 5; + snprintf(comp_name, 63, "/dev/%s", devname); + if (init_stat(comp_name, &stat, 0) == 0 && S_ISBLK(stat.mode)) + dev = new_decode_dev(stat.rdev); + if (!dev) { + pr_warn("md: Unknown device name: %s\n", devname); + break; + } + + devices[i] = dev; + devname = p; + } + devices[i] = 0; + + if (!i) + return; + + pr_info("md: Loading %s: %s\n", name, args->device_names); + + bdev = blkdev_get_by_dev(mdev, FMODE_READ, NULL); + if (IS_ERR(bdev)) { + pr_err("md: open failed - cannot start array %s\n", name); + return; + } + + err = -EIO; + if (WARN(bdev->bd_disk->fops != &md_fops, + "Opening block device %x resulted in non-md device\n", + mdev)) + goto out_blkdev_put; + + mddev = bdev->bd_disk->private_data; + + err = mddev_lock(mddev); + if (err) { + pr_err("md: failed to lock array %s\n", name); + goto out_blkdev_put; + } + + if (!list_empty(&mddev->disks) || mddev->raid_disks) { + pr_warn("md: Ignoring %s, already autodetected. (Use raid=noautodetect)\n", + name); + goto out_unlock; + } + + if (args->level != LEVEL_NONE) { + /* non-persistent */ + ainfo.level = args->level; + ainfo.md_minor = args->minor; + ainfo.not_persistent = 1; + ainfo.state = (1 << MD_SB_CLEAN); + ainfo.chunk_size = args->chunk; + while (devices[ainfo.raid_disks]) + ainfo.raid_disks++; + } + + err = md_set_array_info(mddev, &ainfo); + + for (i = 0; i <= MD_SB_DISKS && devices[i]; i++) { + struct mdu_disk_info_s dinfo = { + .major = MAJOR(devices[i]), + .minor = MINOR(devices[i]), + }; + + if (args->level != LEVEL_NONE) { + dinfo.number = i; + dinfo.raid_disk = i; + dinfo.state = + (1 << MD_DISK_ACTIVE) | (1 << MD_DISK_SYNC); + } + + md_add_new_disk(mddev, &dinfo); + } + + if (!err) + err = do_md_run(mddev); + if (err) + pr_warn("md: starting %s failed\n", name); +out_unlock: + mddev_unlock(mddev); +out_blkdev_put: + blkdev_put(bdev, FMODE_READ); +} + +static int __init raid_setup(char *str) +{ + int len, pos; + + len = strlen(str) + 1; + pos = 0; + + while (pos < len) { + char *comma = strchr(str+pos, ','); + int wlen; + if (comma) + wlen = (comma-str)-pos; + else wlen = (len-1)-pos; + + if (!strncmp(str, "noautodetect", wlen)) + raid_noautodetect = 1; + if (!strncmp(str, "autodetect", wlen)) + raid_noautodetect = 0; + if (strncmp(str, "partitionable", wlen)==0) + raid_autopart = 1; + if (strncmp(str, "part", wlen)==0) + raid_autopart = 1; + pos += wlen+1; + } + return 1; +} + +__setup("raid=", raid_setup); +__setup("md=", md_setup); + +static void __init autodetect_raid(void) +{ + /* + * Since we don't want to detect and use half a raid array, we need to + * wait for the known devices to complete their probing + */ + printk(KERN_INFO "md: Waiting for all devices to be available before autodetect\n"); + printk(KERN_INFO "md: If you don't use raid, use raid=noautodetect\n"); + + wait_for_device_probe(); + md_autostart_arrays(raid_autopart); +} + +void __init md_run_setup(void) +{ + int ent; + + if (raid_noautodetect) + printk(KERN_INFO "md: Skipping autodetection of RAID arrays. (raid=autodetect will force)\n"); + else + autodetect_raid(); + + for (ent = 0; ent < md_setup_ents; ent++) + md_setup_drive(&md_setup_args[ent]); +} diff --git a/drivers/md/md.c b/drivers/md/md.c index 153bc766bc5a..15bbdc1630ed 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -68,10 +68,6 @@ #include "md-bitmap.h" #include "md-cluster.h" -#ifndef MODULE -static void autostart_arrays(int part); -#endif - /* pers_list is a list of registered personalities protected * by pers_lock. * pers_lock does extra service to protect accesses to @@ -332,8 +328,6 @@ static struct ctl_table raid_root_table[] = { { } }; -static const struct block_device_operations md_fops; - static int start_readonly; /* @@ -4426,7 +4420,6 @@ array_state_show(struct mddev *mddev, char *page) static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev); static int md_set_readonly(struct mddev *mddev, struct block_device *bdev); -static int do_md_run(struct mddev *mddev); static int restart_array(struct mddev *mddev); static ssize_t @@ -6089,7 +6082,7 @@ abort: } EXPORT_SYMBOL_GPL(md_run); -static int do_md_run(struct mddev *mddev) +int do_md_run(struct mddev *mddev) { int err; @@ -6724,7 +6717,7 @@ static int get_disk_info(struct mddev *mddev, void __user * arg) return 0; } -static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info) +int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info) { char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; struct md_rdev *rdev; @@ -6770,7 +6763,7 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info) } /* - * add_new_disk can be used once the array is assembled + * md_add_new_disk can be used once the array is assembled * to add "hot spares". They must already have a superblock * written */ @@ -6883,7 +6876,7 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info) return err; } - /* otherwise, add_new_disk is only allowed + /* otherwise, md_add_new_disk is only allowed * for major_version==0 superblocks */ if (mddev->major_version != 0) { @@ -7128,7 +7121,7 @@ static int set_bitmap_file(struct mddev *mddev, int fd) } /* - * set_array_info is used two different ways + * md_set_array_info is used two different ways * The original usage is when creating a new array. * In this usage, raid_disks is > 0 and it together with * level, size, not_persistent,layout,chunksize determine the @@ -7140,9 +7133,8 @@ static int set_bitmap_file(struct mddev *mddev, int fd) * The minor and patch _version numbers are also kept incase the * super_block handler wishes to interpret them. */ -static int set_array_info(struct mddev *mddev, mdu_array_info_t *info) +int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info) { - if (info->raid_disks == 0) { /* just setting version number for superblock loading */ if (info->major_version < 0 || @@ -7492,7 +7484,6 @@ static inline bool md_ioctl_valid(unsigned int cmd) case GET_DISK_INFO: case HOT_ADD_DISK: case HOT_REMOVE_DISK: - case RAID_AUTORUN: case RAID_VERSION: case RESTART_ARRAY_RW: case RUN_ARRAY: @@ -7538,13 +7529,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, case RAID_VERSION: err = get_version(argp); goto out; - -#ifndef MODULE - case RAID_AUTORUN: - err = 0; - autostart_arrays(arg); - goto out; -#endif default:; } @@ -7643,7 +7627,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, err = -EBUSY; goto unlock; } - err = set_array_info(mddev, &info); + err = md_set_array_info(mddev, &info); if (err) { pr_warn("md: couldn't set array info. %d\n", err); goto unlock; @@ -7697,7 +7681,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, /* Need to clear read-only for this */ break; else - err = add_new_disk(mddev, &info); + err = md_add_new_disk(mddev, &info); goto unlock; } break; @@ -7765,7 +7749,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, if (copy_from_user(&info, argp, sizeof(info))) err = -EFAULT; else - err = add_new_disk(mddev, &info); + err = md_add_new_disk(mddev, &info); goto unlock; } @@ -7888,7 +7872,7 @@ static unsigned int md_check_events(struct gendisk *disk, unsigned int clearing) return ret; } -static const struct block_device_operations md_fops = +const struct block_device_operations md_fops = { .owner = THIS_MODULE, .submit_bio = md_submit_bio, @@ -9791,7 +9775,7 @@ void md_autodetect_dev(dev_t dev) } } -static void autostart_arrays(int part) +void md_autostart_arrays(int part) { struct md_rdev *rdev; struct detected_devices_node *node_detected_dev; diff --git a/drivers/md/md.h b/drivers/md/md.h index f79b5b4101ef..d9c4e6b7e939 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -803,4 +803,16 @@ static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio !bio->bi_disk->queue->limits.max_write_zeroes_sectors) mddev->queue->limits.max_write_zeroes_sectors = 0; } + +struct mdu_array_info_s; +struct mdu_disk_info_s; + +extern int mdp_major; +void md_autostart_arrays(int part); +int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info); +int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info); +int do_md_run(struct mddev *mddev); + +extern const struct block_device_operations md_fops; + #endif /* _MD_MD_H */ diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index fb8d1fb14088..ef0fd4830803 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7019,7 +7019,7 @@ static struct r5conf *setup_conf(struct mddev *mddev) } else goto abort; spin_lock_init(&conf->device_lock); - seqcount_init(&conf->gen_lock); + seqcount_spinlock_init(&conf->gen_lock, &conf->device_lock); mutex_init(&conf->cache_size_mutex); init_waitqueue_head(&conf->wait_for_quiescent); init_waitqueue_head(&conf->wait_for_stripe); diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 7fb3b26a181a..16fc29472f5c 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -582,7 +582,7 @@ struct r5conf { int prev_chunk_sectors; int prev_algo; short generation; /* increments with every reshape */ - seqcount_t gen_lock; /* lock against generation changes */ + seqcount_spinlock_t gen_lock; /* lock against generation changes */ unsigned long reshape_checkpoint; /* Time we last updated * metadata */ long long min_offset_diff; /* minimum difference between |