aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Kconfig54
-rw-r--r--drivers/md/bcache/Makefile2
-rw-r--r--drivers/md/bcache/alloc.c5
-rw-r--r--drivers/md/bcache/bcache.h4
-rw-r--r--drivers/md/bcache/bset.c17
-rw-r--r--drivers/md/bcache/btree.c19
-rw-r--r--drivers/md/bcache/closure.c7
-rw-r--r--drivers/md/bcache/request.c12
-rw-r--r--drivers/md/bcache/super.c56
-rw-r--r--drivers/md/bcache/sysfs.c7
-rw-r--r--drivers/md/bcache/writeback.c4
-rw-r--r--drivers/md/dm-bio-prison-v1.c27
-rw-r--r--drivers/md/dm-bio-prison-v2.c26
-rw-r--r--drivers/md/dm-cache-target.c77
-rw-r--r--drivers/md/dm-clone-metadata.c29
-rw-r--r--drivers/md/dm-clone-metadata.h4
-rw-r--r--drivers/md/dm-clone-target.c62
-rw-r--r--drivers/md/dm-crypt.c9
-rw-r--r--drivers/md/dm-dust.c97
-rw-r--r--drivers/md/dm-flakey.c25
-rw-r--r--drivers/md/dm-integrity.c28
-rw-r--r--drivers/md/dm-linear.c22
-rw-r--r--drivers/md/dm-raid.c164
-rw-r--r--drivers/md/dm-stripe.c15
-rw-r--r--drivers/md/dm-table.c27
-rw-r--r--drivers/md/dm-thin.c118
-rw-r--r--drivers/md/dm-writecache.c5
-rw-r--r--drivers/md/dm-zoned-metadata.c166
-rw-r--r--drivers/md/dm-zoned-reclaim.c8
-rw-r--r--drivers/md/dm-zoned-target.c54
-rw-r--r--drivers/md/dm-zoned.h2
-rw-r--r--drivers/md/dm.c135
-rw-r--r--drivers/md/md-bitmap.c2
-rw-r--r--drivers/md/md-linear.c5
-rw-r--r--drivers/md/md-multipath.c5
-rw-r--r--drivers/md/md.c57
-rw-r--r--drivers/md/md.h4
-rw-r--r--drivers/md/raid0.c7
-rw-r--r--drivers/md/raid1.c6
-rw-r--r--drivers/md/raid10.c7
-rw-r--r--drivers/md/raid5-ppl.c2
-rw-r--r--drivers/md/raid5.c8
42 files changed, 697 insertions, 693 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index aa98953f4462..d6d5ab23c088 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -38,9 +38,9 @@ config MD_AUTODETECT
default y
---help---
If you say Y here, then the kernel will try to autodetect raid
- arrays as part of its boot process.
+ arrays as part of its boot process.
- If you don't use raid and say Y, this autodetection can cause
+ If you don't use raid and say Y, this autodetection can cause
a several-second delay in the boot time due to various
synchronisation steps that are part of this step.
@@ -290,7 +290,7 @@ config DM_SNAPSHOT
depends on BLK_DEV_DM
select DM_BUFIO
---help---
- Allow volume managers to take writable snapshots of a device.
+ Allow volume managers to take writable snapshots of a device.
config DM_THIN_PROVISIONING
tristate "Thin provisioning target"
@@ -298,7 +298,7 @@ config DM_THIN_PROVISIONING
select DM_PERSISTENT_DATA
select DM_BIO_PRISON
---help---
- Provides thin provisioning and snapshots that share a data store.
+ Provides thin provisioning and snapshots that share a data store.
config DM_CACHE
tristate "Cache target (EXPERIMENTAL)"
@@ -307,23 +307,23 @@ config DM_CACHE
select DM_PERSISTENT_DATA
select DM_BIO_PRISON
---help---
- dm-cache attempts to improve performance of a block device by
- moving frequently used data to a smaller, higher performance
- device. Different 'policy' plugins can be used to change the
- algorithms used to select which blocks are promoted, demoted,
- cleaned etc. It supports writeback and writethrough modes.
+ dm-cache attempts to improve performance of a block device by
+ moving frequently used data to a smaller, higher performance
+ device. Different 'policy' plugins can be used to change the
+ algorithms used to select which blocks are promoted, demoted,
+ cleaned etc. It supports writeback and writethrough modes.
config DM_CACHE_SMQ
tristate "Stochastic MQ Cache Policy (EXPERIMENTAL)"
depends on DM_CACHE
default y
---help---
- A cache policy that uses a multiqueue ordered by recent hits
- to select which blocks should be promoted and demoted.
- This is meant to be a general purpose policy. It prioritises
- reads over writes. This SMQ policy (vs MQ) offers the promise
- of less memory utilization, improved performance and increased
- adaptability in the face of changing workloads.
+ A cache policy that uses a multiqueue ordered by recent hits
+ to select which blocks should be promoted and demoted.
+ This is meant to be a general purpose policy. It prioritises
+ reads over writes. This SMQ policy (vs MQ) offers the promise
+ of less memory utilization, improved performance and increased
+ adaptability in the face of changing workloads.
config DM_WRITECACHE
tristate "Writecache target"
@@ -343,9 +343,9 @@ config DM_ERA
select DM_PERSISTENT_DATA
select DM_BIO_PRISON
---help---
- dm-era tracks which parts of a block device are written to
- over time. Useful for maintaining cache coherency when using
- vendor snapshots.
+ dm-era tracks which parts of a block device are written to
+ over time. Useful for maintaining cache coherency when using
+ vendor snapshots.
config DM_CLONE
tristate "Clone target (EXPERIMENTAL)"
@@ -353,20 +353,20 @@ config DM_CLONE
default n
select DM_PERSISTENT_DATA
---help---
- dm-clone produces a one-to-one copy of an existing, read-only source
- device into a writable destination device. The cloned device is
- visible/mountable immediately and the copy of the source device to the
- destination device happens in the background, in parallel with user
- I/O.
+ dm-clone produces a one-to-one copy of an existing, read-only source
+ device into a writable destination device. The cloned device is
+ visible/mountable immediately and the copy of the source device to the
+ destination device happens in the background, in parallel with user
+ I/O.
- If unsure, say N.
+ If unsure, say N.
config DM_MIRROR
tristate "Mirror target"
depends on BLK_DEV_DM
---help---
- Allow volume managers to mirror logical volumes, also
- needed for live data migration tools such as 'pvmove'.
+ Allow volume managers to mirror logical volumes, also
+ needed for live data migration tools such as 'pvmove'.
config DM_LOG_USERSPACE
tristate "Mirror userspace logging"
@@ -483,7 +483,7 @@ config DM_FLAKEY
tristate "Flakey target"
depends on BLK_DEV_DM
---help---
- A target that intermittently fails I/O for debugging purposes.
+ A target that intermittently fails I/O for debugging purposes.
config DM_VERITY
tristate "Verity target support"
diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile
index d26b35195825..fd714628da6a 100644
--- a/drivers/md/bcache/Makefile
+++ b/drivers/md/bcache/Makefile
@@ -5,5 +5,3 @@ obj-$(CONFIG_BCACHE) += bcache.o
bcache-y := alloc.o bset.o btree.o closure.o debug.o extents.o\
io.o journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\
util.o writeback.o
-
-CFLAGS_request.o += -Iblock
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 6f776823b9ba..a1df0d95151c 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -377,7 +377,10 @@ retry_invalidate:
if (!fifo_full(&ca->free_inc))
goto retry_invalidate;
- bch_prio_write(ca);
+ if (bch_prio_write(ca, false) < 0) {
+ ca->invalidate_needs_gc = 1;
+ wake_up_gc(ca->set);
+ }
}
}
out:
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 013e35a9e317..9198c1b480d9 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -582,6 +582,7 @@ struct cache_set {
*/
wait_queue_head_t btree_cache_wait;
struct task_struct *btree_cache_alloc_lock;
+ spinlock_t btree_cannibalize_lock;
/*
* When we free a btree node, we increment the gen of the bucket the
@@ -723,6 +724,7 @@ struct cache_set {
unsigned int gc_always_rewrite:1;
unsigned int shrinker_disabled:1;
unsigned int copy_gc_enabled:1;
+ unsigned int idle_max_writeback_rate_enabled:1;
#define BUCKET_HASH_BITS 12
struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS];
@@ -977,7 +979,7 @@ bool bch_cached_dev_error(struct cached_dev *dc);
__printf(2, 3)
bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...);
-void bch_prio_write(struct cache *ca);
+int bch_prio_write(struct cache *ca, bool wait);
void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent);
extern struct workqueue_struct *bcache_wq;
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 08768796b543..cffcdc9feefb 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -155,6 +155,7 @@ int __bch_keylist_realloc(struct keylist *l, unsigned int u64s)
return 0;
}
+/* Pop the top key of keylist by pointing l->top to its previous key */
struct bkey *bch_keylist_pop(struct keylist *l)
{
struct bkey *k = l->keys;
@@ -168,6 +169,7 @@ struct bkey *bch_keylist_pop(struct keylist *l)
return l->top = k;
}
+/* Pop the bottom key of keylist and update l->top_p */
void bch_keylist_pop_front(struct keylist *l)
{
l->top_p -= bkey_u64s(l->keys);
@@ -309,7 +311,6 @@ void bch_btree_keys_free(struct btree_keys *b)
t->tree = NULL;
t->data = NULL;
}
-EXPORT_SYMBOL(bch_btree_keys_free);
int bch_btree_keys_alloc(struct btree_keys *b,
unsigned int page_order,
@@ -342,7 +343,6 @@ err:
bch_btree_keys_free(b);
return -ENOMEM;
}
-EXPORT_SYMBOL(bch_btree_keys_alloc);
void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops,
bool *expensive_debug_checks)
@@ -361,7 +361,6 @@ void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops,
* any more.
*/
}
-EXPORT_SYMBOL(bch_btree_keys_init);
/* Binary tree stuff for auxiliary search trees */
@@ -678,7 +677,6 @@ void bch_bset_init_next(struct btree_keys *b, struct bset *i, uint64_t magic)
bch_bset_build_unwritten_tree(b);
}
-EXPORT_SYMBOL(bch_bset_init_next);
/*
* Build auxiliary binary tree 'struct bset_tree *t', this tree is used to
@@ -732,7 +730,6 @@ void bch_bset_build_written_tree(struct btree_keys *b)
j = inorder_next(j, t->size))
make_bfloat(t, j);
}
-EXPORT_SYMBOL(bch_bset_build_written_tree);
/* Insert */
@@ -780,7 +777,6 @@ fix_right: do {
j = j * 2 + 1;
} while (j < t->size);
}
-EXPORT_SYMBOL(bch_bset_fix_invalidated_key);
static void bch_bset_fix_lookup_table(struct btree_keys *b,
struct bset_tree *t,
@@ -855,7 +851,6 @@ bool bch_bkey_try_merge(struct btree_keys *b, struct bkey *l, struct bkey *r)
return b->ops->key_merge(b, l, r);
}
-EXPORT_SYMBOL(bch_bkey_try_merge);
void bch_bset_insert(struct btree_keys *b, struct bkey *where,
struct bkey *insert)
@@ -875,7 +870,6 @@ void bch_bset_insert(struct btree_keys *b, struct bkey *where,
bkey_copy(where, insert);
bch_bset_fix_lookup_table(b, t, where);
}
-EXPORT_SYMBOL(bch_bset_insert);
unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
struct bkey *replace_key)
@@ -931,7 +925,6 @@ copy: bkey_copy(m, k);
merged:
return status;
}
-EXPORT_SYMBOL(bch_btree_insert_key);
/* Lookup */
@@ -1077,7 +1070,6 @@ struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t,
return i.l;
}
-EXPORT_SYMBOL(__bch_bset_search);
/* Btree iterator */
@@ -1132,7 +1124,6 @@ struct bkey *bch_btree_iter_init(struct btree_keys *b,
{
return __bch_btree_iter_init(b, iter, search, b->set);
}
-EXPORT_SYMBOL(bch_btree_iter_init);
static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
btree_iter_cmp_fn *cmp)
@@ -1165,7 +1156,6 @@ struct bkey *bch_btree_iter_next(struct btree_iter *iter)
return __bch_btree_iter_next(iter, btree_iter_cmp);
}
-EXPORT_SYMBOL(bch_btree_iter_next);
struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
struct btree_keys *b, ptr_filter_fn fn)
@@ -1196,7 +1186,6 @@ int bch_bset_sort_state_init(struct bset_sort_state *state,
return mempool_init_page_pool(&state->pool, 1, page_order);
}
-EXPORT_SYMBOL(bch_bset_sort_state_init);
static void btree_mergesort(struct btree_keys *b, struct bset *out,
struct btree_iter *iter,
@@ -1313,7 +1302,6 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
EBUG_ON(oldsize >= 0 && bch_count_data(b) != oldsize);
}
-EXPORT_SYMBOL(bch_btree_sort_partial);
void bch_btree_sort_and_fix_extents(struct btree_keys *b,
struct btree_iter *iter,
@@ -1366,7 +1354,6 @@ void bch_btree_sort_lazy(struct btree_keys *b, struct bset_sort_state *state)
out:
bch_bset_build_written_tree(b);
}
-EXPORT_SYMBOL(bch_btree_sort_lazy);
void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *stats)
{
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index ba434d9ac720..14d6c33b0957 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -543,6 +543,11 @@ static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref)
set_btree_node_dirty(b);
+ /*
+ * w->journal is always the oldest journal pin of all bkeys
+ * in the leaf node, to make sure the oldest jset seq won't
+ * be increased before this btree node is flushed.
+ */
if (journal_ref) {
if (w->journal &&
journal_pin_cmp(b->c, w->journal, journal_ref)) {
@@ -723,6 +728,8 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
* IO can always make forward progress:
*/
nr /= c->btree_pages;
+ if (nr == 0)
+ nr = 1;
nr = min_t(unsigned long, nr, mca_can_free(c));
i = 0;
@@ -884,15 +891,17 @@ out:
static int mca_cannibalize_lock(struct cache_set *c, struct btree_op *op)
{
- struct task_struct *old;
-
- old = cmpxchg(&c->btree_cache_alloc_lock, NULL, current);
- if (old && old != current) {
+ spin_lock(&c->btree_cannibalize_lock);
+ if (likely(c->btree_cache_alloc_lock == NULL)) {
+ c->btree_cache_alloc_lock = current;
+ } else if (c->btree_cache_alloc_lock != current) {
if (op)
prepare_to_wait(&c->btree_cache_wait, &op->wait,
TASK_UNINTERRUPTIBLE);
+ spin_unlock(&c->btree_cannibalize_lock);
return -EINTR;
}
+ spin_unlock(&c->btree_cannibalize_lock);
return 0;
}
@@ -927,10 +936,12 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct btree_op *op,
*/
static void bch_cannibalize_unlock(struct cache_set *c)
{
+ spin_lock(&c->btree_cannibalize_lock);
if (c->btree_cache_alloc_lock == current) {
c->btree_cache_alloc_lock = NULL;
wake_up(&c->btree_cache_wait);
}
+ spin_unlock(&c->btree_cannibalize_lock);
}
static struct btree *mca_alloc(struct cache_set *c, struct btree_op *op,
diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
index c12cd809ab19..0164a1fe94a9 100644
--- a/drivers/md/bcache/closure.c
+++ b/drivers/md/bcache/closure.c
@@ -45,7 +45,6 @@ void closure_sub(struct closure *cl, int v)
{
closure_put_after_sub(cl, atomic_sub_return(v, &cl->remaining));
}
-EXPORT_SYMBOL(closure_sub);
/*
* closure_put - decrement a closure's refcount
@@ -54,7 +53,6 @@ void closure_put(struct closure *cl)
{
closure_put_after_sub(cl, atomic_dec_return(&cl->remaining));
}
-EXPORT_SYMBOL(closure_put);
/*
* closure_wake_up - wake up all closures on a wait list, without memory barrier
@@ -76,7 +74,6 @@ void __closure_wake_up(struct closure_waitlist *wait_list)
closure_sub(cl, CLOSURE_WAITING + 1);
}
}
-EXPORT_SYMBOL(__closure_wake_up);
/**
* closure_wait - add a closure to a waitlist
@@ -96,7 +93,6 @@ bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
return true;
}
-EXPORT_SYMBOL(closure_wait);
struct closure_syncer {
struct task_struct *task;
@@ -131,7 +127,6 @@ void __sched __closure_sync(struct closure *cl)
__set_current_state(TASK_RUNNING);
}
-EXPORT_SYMBOL(__closure_sync);
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
@@ -149,7 +144,6 @@ void closure_debug_create(struct closure *cl)
list_add(&cl->all, &closure_list);
spin_unlock_irqrestore(&closure_list_lock, flags);
}
-EXPORT_SYMBOL(closure_debug_create);
void closure_debug_destroy(struct closure *cl)
{
@@ -162,7 +156,6 @@ void closure_debug_destroy(struct closure *cl)
list_del(&cl->all);
spin_unlock_irqrestore(&closure_list_lock, flags);
}
-EXPORT_SYMBOL(closure_debug_destroy);
static struct dentry *closure_debug;
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 41adcd1546f1..73478a91a342 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -62,18 +62,6 @@ static void bch_data_insert_keys(struct closure *cl)
struct bkey *replace_key = op->replace ? &op->replace_key : NULL;
int ret;
- /*
- * If we're looping, might already be waiting on
- * another journal write - can't wait on more than one journal write at
- * a time
- *
- * XXX: this looks wrong
- */
-#if 0
- while (atomic_read(&s->cl.remaining) & CLOSURE_WAITING)
- closure_sync(&s->cl);
-#endif
-
if (!op->replace)
journal_ref = bch_journal(op->c, &op->insert_keys,
op->flush_journal ? cl : NULL);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 20ed838e9413..77e9869345e7 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -92,10 +92,11 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u",
sb->version, sb->flags, sb->seq, sb->keys);
- err = "Not a bcache superblock";
+ err = "Not a bcache superblock (bad offset)";
if (sb->offset != SB_SECTOR)
goto err;
+ err = "Not a bcache superblock (bad magic)";
if (memcmp(sb->magic, bcache_magic, 16))
goto err;
@@ -529,12 +530,29 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op,
closure_sync(cl);
}
-void bch_prio_write(struct cache *ca)
+int bch_prio_write(struct cache *ca, bool wait)
{
int i;
struct bucket *b;
struct closure cl;
+ pr_debug("free_prio=%zu, free_none=%zu, free_inc=%zu",
+ fifo_used(&ca->free[RESERVE_PRIO]),
+ fifo_used(&ca->free[RESERVE_NONE]),
+ fifo_used(&ca->free_inc));
+
+ /*
+ * Pre-check if there are enough free buckets. In the non-blocking
+ * scenario it's better to fail early rather than starting to allocate
+ * buckets and do a cleanup later in case of failure.
+ */
+ if (!wait) {
+ size_t avail = fifo_used(&ca->free[RESERVE_PRIO]) +
+ fifo_used(&ca->free[RESERVE_NONE]);
+ if (prio_buckets(ca) > avail)
+ return -ENOMEM;
+ }
+
closure_init_stack(&cl);
lockdep_assert_held(&ca->set->bucket_lock);
@@ -544,9 +562,6 @@ void bch_prio_write(struct cache *ca)
atomic_long_add(ca->sb.bucket_size * prio_buckets(ca),
&ca->meta_sectors_written);
- //pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free),
- // fifo_used(&ca->free_inc), fifo_used(&ca->unused));
-
for (i = prio_buckets(ca) - 1; i >= 0; --i) {
long bucket;
struct prio_set *p = ca->disk_buckets;
@@ -564,7 +579,7 @@ void bch_prio_write(struct cache *ca)
p->magic = pset_magic(&ca->sb);
p->csum = bch_crc64(&p->magic, bucket_bytes(ca) - 8);
- bucket = bch_bucket_alloc(ca, RESERVE_PRIO, true);
+ bucket = bch_bucket_alloc(ca, RESERVE_PRIO, wait);
BUG_ON(bucket == -1);
mutex_unlock(&ca->set->bucket_lock);
@@ -593,6 +608,7 @@ void bch_prio_write(struct cache *ca)
ca->prio_last_buckets[i] = ca->prio_buckets[i];
}
+ return 0;
}
static void prio_read(struct cache *ca, uint64_t bucket)
@@ -761,20 +777,28 @@ static inline int idx_to_first_minor(int idx)
static void bcache_device_free(struct bcache_device *d)
{
+ struct gendisk *disk = d->disk;
+
lockdep_assert_held(&bch_register_lock);
- pr_info("%s stopped", d->disk->disk_name);
+ if (disk)
+ pr_info("%s stopped", disk->disk_name);
+ else
+ pr_err("bcache device (NULL gendisk) stopped");
if (d->c)
bcache_device_detach(d);
- if (d->disk && d->disk->flags & GENHD_FL_UP)
- del_gendisk(d->disk);
- if (d->disk && d->disk->queue)
- blk_cleanup_queue(d->disk->queue);
- if (d->disk) {
+
+ if (disk) {
+ if (disk->flags & GENHD_FL_UP)
+ del_gendisk(disk);
+
+ if (disk->queue)
+ blk_cleanup_queue(disk->queue);
+
ida_simple_remove(&bcache_device_idx,
- first_minor_to_idx(d->disk->first_minor));
- put_disk(d->disk);
+ first_minor_to_idx(disk->first_minor));
+ put_disk(disk);
}
bioset_exit(&d->bio_split);
@@ -1769,6 +1793,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
sema_init(&c->sb_write_mutex, 1);
mutex_init(&c->bucket_lock);
init_waitqueue_head(&c->btree_cache_wait);
+ spin_lock_init(&c->btree_cannibalize_lock);
init_waitqueue_head(&c->bucket_wait);
init_waitqueue_head(&c->gc_wait);
sema_init(&c->uuid_write_mutex, 1);
@@ -1809,6 +1834,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
c->congested_read_threshold_us = 2000;
c->congested_write_threshold_us = 20000;
c->error_limit = DEFAULT_IO_ERROR_LIMIT;
+ c->idle_max_writeback_rate_enabled = 1;
WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags));
return c;
@@ -1954,7 +1980,7 @@ static int run_cache_set(struct cache_set *c)
mutex_lock(&c->bucket_lock);
for_each_cache(ca, c, i)
- bch_prio_write(ca);
+ bch_prio_write(ca, true);
mutex_unlock(&c->bucket_lock);
err = "cannot allocate new UUID bucket";
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 627dcea0f5b6..733e2ddf3c78 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -134,6 +134,7 @@ rw_attribute(expensive_debug_checks);
rw_attribute(cache_replacement_policy);
rw_attribute(btree_shrinker_disabled);
rw_attribute(copy_gc_enabled);
+rw_attribute(idle_max_writeback_rate);
rw_attribute(gc_after_writeback);
rw_attribute(size);
@@ -747,6 +748,8 @@ SHOW(__bch_cache_set)
sysfs_printf(gc_always_rewrite, "%i", c->gc_always_rewrite);
sysfs_printf(btree_shrinker_disabled, "%i", c->shrinker_disabled);
sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
+ sysfs_printf(idle_max_writeback_rate, "%i",
+ c->idle_max_writeback_rate_enabled);
sysfs_printf(gc_after_writeback, "%i", c->gc_after_writeback);
sysfs_printf(io_disable, "%i",
test_bit(CACHE_SET_IO_DISABLE, &c->flags));
@@ -864,6 +867,9 @@ STORE(__bch_cache_set)
sysfs_strtoul_bool(gc_always_rewrite, c->gc_always_rewrite);
sysfs_strtoul_bool(btree_shrinker_disabled, c->shrinker_disabled);
sysfs_strtoul_bool(copy_gc_enabled, c->copy_gc_enabled);
+ sysfs_strtoul_bool(idle_max_writeback_rate,
+ c->idle_max_writeback_rate_enabled);
+
/*
* write gc_after_writeback here may overwrite an already set
* BCH_DO_AUTO_GC, it doesn't matter because this flag will be
@@ -954,6 +960,7 @@ static struct attribute *bch_cache_set_internal_files[] = {
&sysfs_gc_always_rewrite,
&sysfs_btree_shrinker_disabled,
&sysfs_copy_gc_enabled,
+ &sysfs_idle_max_writeback_rate,
&sysfs_gc_after_writeback,
&sysfs_io_disable,
&sysfs_cutoff_writeback,
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index d60268fe49e1..4a40f9eadeaf 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -122,6 +122,10 @@ static void __update_writeback_rate(struct cached_dev *dc)
static bool set_at_max_writeback_rate(struct cache_set *c,
struct cached_dev *dc)
{
+ /* Don't sst max writeback rate if it is disabled */
+ if (!c->idle_max_writeback_rate_enabled)
+ return false;
+
/* Don't set max writeback rate if gc is running */
if (!c->gc_mark_valid)
return false;
diff --git a/drivers/md/dm-bio-prison-v1.c b/drivers/md/dm-bio-prison-v1.c
index b5389890bbc3..1f8f98efd97a 100644
--- a/drivers/md/dm-bio-prison-v1.c
+++ b/drivers/md/dm-bio-prison-v1.c
@@ -150,11 +150,10 @@ static int bio_detain(struct dm_bio_prison *prison,
struct dm_bio_prison_cell **cell_result)
{
int r;
- unsigned long flags;
- spin_lock_irqsave(&prison->lock, flags);
+ spin_lock_irq(&prison->lock);
r = __bio_detain(prison, key, inmate, cell_prealloc, cell_result);
- spin_unlock_irqrestore(&prison->lock, flags);
+ spin_unlock_irq(&prison->lock);
return r;
}
@@ -198,11 +197,9 @@ void dm_cell_release(struct dm_bio_prison *prison,
struct dm_bio_prison_cell *cell,
struct bio_list *bios)
{
- unsigned long flags;
-
- spin_lock_irqsave(&prison->lock, flags);
+ spin_lock_irq(&prison->lock);
__cell_release(prison, cell, bios);
- spin_unlock_irqrestore(&prison->lock, flags);
+ spin_unlock_irq(&prison->lock);
}
EXPORT_SYMBOL_GPL(dm_cell_release);
@@ -250,12 +247,10 @@ void dm_cell_visit_release(struct dm_bio_prison *prison,
void *context,
struct dm_bio_prison_cell *cell)
{
- unsigned long flags;
-
- spin_lock_irqsave(&prison->lock, flags);
+ spin_lock_irq(&prison->lock);
visit_fn(context, cell);
rb_erase(&cell->node, &prison->cells);
- spin_unlock_irqrestore(&prison->lock, flags);
+ spin_unlock_irq(&prison->lock);
}
EXPORT_SYMBOL_GPL(dm_cell_visit_release);
@@ -275,11 +270,10 @@ int dm_cell_promote_or_release(struct dm_bio_prison *prison,
struct dm_bio_prison_cell *cell)
{
int r;
- unsigned long flags;
- spin_lock_irqsave(&prison->lock, flags);
+ spin_lock_irq(&prison->lock);
r = __promote_or_release(prison, cell);
- spin_unlock_irqrestore(&prison->lock, flags);
+ spin_unlock_irq(&prison->lock);
return r;
}
@@ -379,10 +373,9 @@ EXPORT_SYMBOL_GPL(dm_deferred_entry_dec);
int dm_deferred_set_add_work(struct dm_deferred_set *ds, struct list_head *work)
{
int r = 1;
- unsigned long flags;
unsigned next_entry;
- spin_lock_irqsave(&ds->lock, flags);
+ spin_lock_irq(&ds->lock);
if ((ds->sweeper == ds->current_entry) &&
!ds->entries[ds->current_entry].count)
r = 0;
@@ -392,7 +385,7 @@ int dm_deferred_set_add_work(struct dm_deferred_set *ds, struct list_head *work)
if (!ds->entries[next_entry].count)
ds->current_entry = next_entry;
}
- spin_unlock_irqrestore(&ds->lock, flags);
+ spin_unlock_irq(&ds->lock);
return r;
}
diff --git a/drivers/md/dm-bio-prison-v2.c b/drivers/md/dm-bio-prison-v2.c
index b092cdc8e1ae..8ee019eda32d 100644
--- a/drivers/md/dm-bio-prison-v2.c
+++ b/drivers/md/dm-bio-prison-v2.c
@@ -177,11 +177,10 @@ bool dm_cell_get_v2(struct dm_bio_prison_v2 *prison,
struct dm_bio_prison_cell_v2 **cell_result)
{
int r;
- unsigned long flags;
- spin_lock_irqsave(&prison->lock, flags);
+ spin_lock_irq(&prison->lock);
r = __get(prison, key, lock_level, inmate, cell_prealloc, cell_result);
- spin_unlock_irqrestore(&prison->lock, flags);
+ spin_unlock_irq(&prison->lock);
return r;
}
@@ -261,11 +260,10 @@ int dm_cell_lock_v2(struct dm_bio_prison_v2 *prison,
struct dm_bio_prison_cell_v2 **cell_result)
{
int r;
- unsigned long flags;
- spin_lock_irqsave(&prison->lock, flags);
+ spin_lock_irq(&prison->lock);
r = __lock(prison, key, lock_level, cell_prealloc, cell_result);
- spin_unlock_irqrestore(&prison->lock, flags);
+ spin_unlock_irq(&prison->lock);
return r;
}
@@ -285,11 +283,9 @@ void dm_cell_quiesce_v2(struct dm_bio_prison_v2 *prison,
struct dm_bio_prison_cell_v2 *cell,
struct work_struct *continuation)
{
- unsigned long flags;
-
- spin_lock_irqsave(&prison->lock, flags);
+ spin_lock_irq(&prison->lock);
__quiesce(prison, cell, continuation);
- spin_unlock_irqrestore(&prison->lock, flags);
+ spin_unlock_irq(&prison->lock);
}
EXPORT_SYMBOL_GPL(dm_cell_quiesce_v2);
@@ -309,11 +305,10 @@ int dm_cell_lock_promote_v2(struct dm_bio_prison_v2 *prison,
unsigned new_lock_level)
{
int r;
- unsigned long flags;
- spin_lock_irqsave(&prison->lock, flags);
+ spin_lock_irq(&prison->lock);
r = __promote(prison, cell, new_lock_level);
- spin_unlock_irqrestore(&prison->lock, flags);
+ spin_unlock_irq(&prison->lock);
return r;
}
@@ -342,11 +337,10 @@ bool dm_cell_unlock_v2(struct dm_bio_prison_v2 *prison,
struct bio_list *bios)
{
bool r;
- unsigned long flags;
- spin_lock_irqsave(&prison->lock, flags);
+ spin_lock_irq(&prison->lock);
r = __unlock(prison, cell, bios);
- spin_unlock_irqrestore(&prison->lock, flags);
+ spin_unlock_irq(&prison->lock);
return r;
}
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 8346e6d1816c..2d32821b3a5b 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -74,22 +74,19 @@ static bool __iot_idle_for(struct io_tracker *iot, unsigned long jifs)
static bool iot_idle_for(struct io_tracker *iot, unsigned long jifs)
{
bool r;
- unsigned long flags;
- spin_lock_irqsave(&iot->lock, flags);
+ spin_lock_irq(&iot->lock);
r = __iot_idle_for(iot, jifs);
- spin_unlock_irqrestore(&iot->lock, flags);
+ spin_unlock_irq(&iot->lock);
return r;
}
static void iot_io_begin(struct io_tracker *iot, sector_t len)
{
- unsigned long flags;
-
- spin_lock_irqsave(&iot->lock, flags);
+ spin_lock_irq(&iot->lock);
iot->in_flight += len;
- spin_unlock_irqrestore(&iot->lock, flags);
+ spin_unlock_irq(&iot->lock);
}
static void __iot_io_end(struct io_tracker *iot, sector_t len)
@@ -172,7 +169,6 @@ static void __commit(struct work_struct *_ws)
{
struct batcher *b = container_of(_ws, struct batcher, commit_work);
blk_status_t r;
- unsigned long flags;
struct list_head work_items;
struct work_struct *ws, *tmp;
struct continuation *k;
@@ -186,12 +182,12 @@ static void __commit(struct work_struct *_ws)
* We have to grab these before the commit_op to avoid a race
* condition.
*/
- spin_lock_irqsave(&b->lock, flags);
+ spin_lock_irq(&b->lock);
list_splice_init(&b->work_items, &work_items);
bio_list_merge(&bios, &b->bios);
bio_list_init(&b->bios);
b->commit_scheduled = false;
- spin_unlock_irqrestore(&b->lock, flags);
+ spin_unlock_irq(&b->lock);
r = b->commit_op(b->commit_context);
@@ -238,13 +234,12 @@ static void async_commit(struct batcher *b)
static void continue_after_commit(struct batcher *b, struct continuation *k)
{
- unsigned long flags;
bool commit_scheduled;
- spin_lock_irqsave(&b->lock, flags);
+ spin_lock_irq(&b->lock);
commit_scheduled = b->commit_scheduled;
list_add_tail(&k->ws.entry, &b->work_items);
- spin_unlock_irqrestore(&b->lock, flags);
+ spin_unlock_irq(&b->lock);
if (commit_scheduled)
async_commit(b);
@@ -255,13 +250,12 @@ static void continue_after_commit(struct batcher *b, struct continuation *k)
*/
static void issue_after_commit(struct batcher *b, struct bio *bio)
{
- unsigned long flags;
bool commit_scheduled;
- spin_lock_irqsave(&b->lock, flags);
+ spin_lock_irq(&b->lock);
commit_scheduled = b->commit_scheduled;
bio_list_add(&b->bios, bio);
- spin_unlock_irqrestore(&b->lock, flags);
+ spin_unlock_irq(&b->lock);
if (commit_scheduled)
async_commit(b);
@@ -273,12 +267,11 @@ static void issue_after_commit(struct batcher *b, struct bio *bio)
static void schedule_commit(struct batcher *b)
{
bool immediate;
- unsigned long flags;
- spin_lock_irqsave(&b->lock, flags);
+ spin_lock_irq(&b->lock);
immediate = !list_empty(&b->work_items) || !bio_list_empty(&b->bios);
b->commit_scheduled = true;
- spin_unlock_irqrestore(&b->lock, flags);
+ spin_unlock_irq(&b->lock);
if (immediate)
async_commit(b);
@@ -630,23 +623,19 @@ static struct per_bio_data *init_per_bio_data(struct bio *bio)
static void defer_bio(struct cache *cache, struct bio *bio)
{
- unsigned long flags;
-
- spin_lock_irqsave(&cache->lock, flags);
+ spin_lock_irq(&cache->lock);
bio_list_add(&cache->deferred_bios, bio);
- spin_unlock_irqrestore(&cache->lock, flags);
+ spin_unlock_irq(&cache->lock);
wake_deferred_bio_worker(cache);
}
static void defer_bios(struct cache *cache, struct bio_list *bios)
{
- unsigned long flags;
-
- spin_lock_irqsave(&cache->lock, flags);
+ spin_lock_irq(&cache->lock);
bio_list_merge(&cache->deferred_bios, bios);
bio_list_init(bios);
- spin_unlock_irqrestore(&cache->lock, flags);
+ spin_unlock_irq(&cache->lock);
wake_deferred_bio_worker(cache);
}
@@ -756,33 +745,27 @@ static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
static void set_discard(struct cache *cache, dm_dblock_t b)
{
- unsigned long flags;
-
BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks));
atomic_inc(&cache->stats.discard_count);
- spin_lock_irqsave(&cache->lock, flags);
+ spin_lock_irq(&cache->lock);
set_bit(from_dblock(b), cache->discard_bitset);
- spin_unlock_irqrestore(&cache->lock, flags);
+ spin_unlock_irq(&cache->lock);
}
static void clear_discard(struct cache *cache, dm_dblock_t b)
{
- unsigned long flags;
-
- spin_lock_irqsave(&cache->lock, flags);
+ spin_lock_irq(&cache->lock);
clear_bit(from_dblock(b), cache->discard_bitset);
- spin_unlock_irqrestore(&cache->lock, flags);
+ spin_unlock_irq(&cache->lock);
}
static bool is_discarded(struct cache *cache, dm_dblock_t b)
{
int r;
- unsigned long flags;
-
- spin_lock_irqsave(&cache->lock, flags);
+ spin_lock_irq(&cache->lock);
r = test_bit(from_dblock(b), cache->discard_bitset);
- spin_unlock_irqrestore(&cache->lock, flags);
+ spin_unlock_irq(&cache->lock);
return r;
}
@@ -790,12 +773,10 @@ static bool is_discarded(struct cache *cache, dm_dblock_t b)
static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
{
int r;
- unsigned long flags;
-
- spin_lock_irqsave(&cache->lock, flags);
+ spin_lock_irq(&cache->lock);
r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
cache->discard_bitset);
- spin_unlock_irqrestore(&cache->lock, flags);
+ spin_unlock_irq(&cache->lock);
return r;
}
@@ -827,17 +808,16 @@ static void remap_to_cache(struct cache *cache, struct bio *bio,
static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
{
- unsigned long flags;
struct per_bio_data *pb;
- spin_lock_irqsave(&cache->lock, flags);
+ spin_lock_irq(&cache->lock);
if (cache->need_tick_bio && !op_is_flush(bio->bi_opf) &&
bio_op(bio) != REQ_OP_DISCARD) {
pb = get_per_bio_data(bio);
pb->tick = true;
cache->need_tick_bio = false;
}
- spin_unlock_irqrestore(&cache->lock, flags);
+ spin_unlock_irq(&cache->lock);
}
static void __remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
@@ -1889,17 +1869,16 @@ static void process_deferred_bios(struct work_struct *ws)
{
struct cache *cache = container_of(ws, struct cache, deferred_bio_worker);
- unsigned long flags;
bool commit_needed = false;
struct bio_list bios;
struct bio *bio;
bio_list_init(&bios);
- spin_lock_irqsave(&cache->lock, flags);
+ spin_lock_irq(&cache->lock);
bio_list_merge(&bios, &cache->deferred_bios);
bio_list_init(&cache->deferred_bios);
- spin_unlock_irqrestore(&cache->lock, flags);
+ spin_unlock_irq(&cache->lock);
while ((bio = bio_list_pop(&bios))) {
if (bio->bi_opf & REQ_PREFLUSH)
diff --git a/drivers/md/dm-clone-metadata.c b/drivers/md/dm-clone-metadata.c
index 6bc8c1d1c351..08c552e5e41b 100644
--- a/drivers/md/dm-clone-metadata.c
+++ b/drivers/md/dm-clone-metadata.c
@@ -712,7 +712,7 @@ static int __metadata_commit(struct dm_clone_metadata *cmd)
static int __flush_dmap(struct dm_clone_metadata *cmd, struct dirty_map *dmap)
{
int r;
- unsigned long word, flags;
+ unsigned long word;
word = 0;
do {
@@ -736,9 +736,9 @@ static int __flush_dmap(struct dm_clone_metadata *cmd, struct dirty_map *dmap)
return r;
/* Update the changed flag */
- spin_lock_irqsave(&cmd->bitmap_lock, flags);
+ spin_lock_irq(&cmd->bitmap_lock);
dmap->changed = 0;
- spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
+ spin_unlock_irq(&cmd->bitmap_lock);
return 0;
}
@@ -746,7 +746,6 @@ static int __flush_dmap(struct dm_clone_metadata *cmd, struct dirty_map *dmap)
int dm_clone_metadata_commit(struct dm_clone_metadata *cmd)
{
int r = -EPERM;
- unsigned long flags;
struct dirty_map *dmap, *next_dmap;
down_write(&cmd->lock);
@@ -770,9 +769,9 @@ int dm_clone_metadata_commit(struct dm_clone_metadata *cmd)
}
/* Swap dirty bitmaps */
- spin_lock_irqsave(&cmd->bitmap_lock, flags);
+ spin_lock_irq(&cmd->bitmap_lock);
cmd->current_dmap = next_dmap;
- spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
+ spin_unlock_irq(&cmd->bitmap_lock);
/*
* No one is accessing the old dirty bitmap anymore, so we can flush
@@ -817,9 +816,9 @@ int dm_clone_cond_set_range(struct dm_clone_metadata *cmd, unsigned long start,
{
int r = 0;
struct dirty_map *dmap;
- unsigned long word, region_nr, flags;
+ unsigned long word, region_nr;
- spin_lock_irqsave(&cmd->bitmap_lock, flags);
+ spin_lock_irq(&cmd->bitmap_lock);
if (cmd->read_only) {
r = -EPERM;
@@ -836,7 +835,7 @@ int dm_clone_cond_set_range(struct dm_clone_metadata *cmd, unsigned long start,
}
}
out:
- spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
+ spin_unlock_irq(&cmd->bitmap_lock);
return r;
}
@@ -903,13 +902,11 @@ out:
void dm_clone_metadata_set_read_only(struct dm_clone_metadata *cmd)
{
- unsigned long flags;
-
down_write(&cmd->lock);
- spin_lock_irqsave(&cmd->bitmap_lock, flags);
+ spin_lock_irq(&cmd->bitmap_lock);
cmd->read_only = 1;
- spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
+ spin_unlock_irq(&cmd->bitmap_lock);
if (!cmd->fail_io)
dm_bm_set_read_only(cmd->bm);
@@ -919,13 +916,11 @@ void dm_clone_metadata_set_read_only(struct dm_clone_metadata *cmd)
void dm_clone_metadata_set_read_write(struct dm_clone_metadata *cmd)
{
- unsigned long flags;
-
down_write(&cmd->lock);
- spin_lock_irqsave(&cmd->bitmap_lock, flags);
+ spin_lock_irq(&cmd->bitmap_lock);
cmd->read_only = 0;
- spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
+ spin_unlock_irq(&cmd->bitmap_lock);
if (!cmd->fail_io)
dm_bm_set_read_write(cmd->bm);
diff --git a/drivers/md/dm-clone-metadata.h b/drivers/md/dm-clone-metadata.h
index 434bff08508b..3fe50a781c11 100644
--- a/drivers/md/dm-clone-metadata.h
+++ b/drivers/md/dm-clone-metadata.h
@@ -44,7 +44,9 @@ int dm_clone_set_region_hydrated(struct dm_clone_metadata *cmd, unsigned long re
* @start: Starting region number
* @nr_regions: Number of regions in the range
*
- * This function doesn't block, so it's safe to call it from interrupt context.
+ * This function doesn't block, but since it uses spin_lock_irq()/spin_unlock_irq()
+ * it's NOT safe to call it from any context where interrupts are disabled, e.g.,
+ * from interrupt context.
*/
int dm_clone_cond_set_range(struct dm_clone_metadata *cmd, unsigned long start,
unsigned long nr_regions);
diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c
index 4ca8f1977222..b3d89072d21c 100644
--- a/drivers/md/dm-clone-target.c
+++ b/drivers/md/dm-clone-target.c
@@ -332,8 +332,6 @@ static void submit_bios(struct bio_list *bios)
*/
static void issue_bio(struct clone *clone, struct bio *bio)
{
- unsigned long flags;
-
if (!bio_triggers_commit(clone, bio)) {
generic_make_request(bio);
return;
@@ -352,9 +350,9 @@ static void issue_bio(struct clone *clone, struct bio *bio)
* Batch together any bios that trigger commits and then issue a single
* commit for them in process_deferred_flush_bios().
*/
- spin_lock_irqsave(&clone->lock, flags);
+ spin_lock_irq(&clone->lock);
bio_list_add(&clone->deferred_flush_bios, bio);
- spin_unlock_irqrestore(&clone->lock, flags);
+ spin_unlock_irq(&clone->lock);
wake_worker(clone);
}
@@ -469,7 +467,7 @@ static void complete_discard_bio(struct clone *clone, struct bio *bio, bool succ
static void process_discard_bio(struct clone *clone, struct bio *bio)
{
- unsigned long rs, re, flags;
+ unsigned long rs, re;
bio_region_range(clone, bio, &rs, &re);
BUG_ON(re > clone->nr_regions);
@@ -501,9 +499,9 @@ static void process_discard_bio(struct clone *clone, struct bio *bio)
/*
* Defer discard processing.
*/
- spin_lock_irqsave(&clone->lock, flags);
+ spin_lock_irq(&clone->lock);
bio_list_add(&clone->deferred_discard_bios, bio);
- spin_unlock_irqrestore(&clone->lock, flags);
+ spin_unlock_irq(&clone->lock);
wake_worker(clone);
}
@@ -554,6 +552,12 @@ struct hash_table_bucket {
#define bucket_unlock_irqrestore(bucket, flags) \
spin_unlock_irqrestore(&(bucket)->lock, flags)
+#define bucket_lock_irq(bucket) \
+ spin_lock_irq(&(bucket)->lock)
+
+#define bucket_unlock_irq(bucket) \
+ spin_unlock_irq(&(bucket)->lock)
+
static int hash_table_init(struct clone *clone)
{
unsigned int i, sz;
@@ -851,7 +855,6 @@ static void hydration_overwrite(struct dm_clone_region_hydration *hd, struct bio
*/
static void hydrate_bio_region(struct clone *clone, struct bio *bio)
{
- unsigned long flags;
unsigned long region_nr;
struct hash_table_bucket *bucket;
struct dm_clone_region_hydration *hd, *hd2;
@@ -859,19 +862,19 @@ static void hydrate_bio_region(struct clone *clone, struct bio *bio)
region_nr = bio_to_region(clone, bio);
bucket = get_hash_table_bucket(clone, region_nr);
- bucket_lock_irqsave(bucket, flags);
+ bucket_lock_irq(bucket);
hd = __hash_find(bucket, region_nr);
if (hd) {
/* Someone else is hydrating the region */
bio_list_add(&hd->deferred_bios, bio);
- bucket_unlock_irqrestore(bucket, flags);
+ bucket_unlock_irq(bucket);
return;
}
if (dm_clone_is_region_hydrated(clone->cmd, region_nr)) {
/* The region has been hydrated */
- bucket_unlock_irqrestore(bucket, flags);
+ bucket_unlock_irq(bucket);
issue_bio(clone, bio);
return;
}
@@ -880,16 +883,16 @@ static void hydrate_bio_region(struct clone *clone, struct bio *bio)
* We must allocate a hydration descriptor and start the hydration of
* the corresponding region.
*/
- bucket_unlock_irqrestore(bucket, flags);
+ bucket_unlock_irq(bucket);
hd = alloc_hydration(clone);
hydration_init(hd, region_nr);
- bucket_lock_irqsave(bucket, flags);
+ bucket_lock_irq(bucket);
/* Check if the region has been hydrated in the meantime. */
if (dm_clone_is_region_hydrated(clone->cmd, region_nr)) {
- bucket_unlock_irqrestore(bucket, flags);
+ bucket_unlock_irq(bucket);
free_hydration(hd);
issue_bio(clone, bio);
return;
@@ -899,7 +902,7 @@ static void hydrate_bio_region(struct clone *clone, struct bio *bio)
if (hd2 != hd) {
/* Someone else started the region's hydration. */
bio_list_add(&hd2->deferred_bios, bio);
- bucket_unlock_irqrestore(bucket, flags);
+ bucket_unlock_irq(bucket);
free_hydration(hd);
return;
}
@@ -911,7 +914,7 @@ static void hydrate_bio_region(struct clone *clone, struct bio *bio)
*/
if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY)) {
hlist_del(&hd->h);
- bucket_unlock_irqrestore(bucket, flags);
+ bucket_unlock_irq(bucket);
free_hydration(hd);
bio_io_error(bio);
return;
@@ -925,11 +928,11 @@ static void hydrate_bio_region(struct clone *clone, struct bio *bio)
* to the destination device.
*/
if (is_overwrite_bio(clone, bio)) {
- bucket_unlock_irqrestore(bucket, flags);
+ bucket_unlock_irq(bucket);
hydration_overwrite(hd, bio);
} else {
bio_list_add(&hd->deferred_bios, bio);
- bucket_unlock_irqrestore(bucket, flags);
+ bucket_unlock_irq(bucket);
hydration_copy(hd, 1);
}
}
@@ -996,7 +999,6 @@ static unsigned long __start_next_hydration(struct clone *clone,
unsigned long offset,
struct batch_info *batch)
{
- unsigned long flags;
struct hash_table_bucket *bucket;
struct dm_clone_region_hydration *hd;
unsigned long nr_regions = clone->nr_regions;
@@ -1010,13 +1012,13 @@ static unsigned long __start_next_hydration(struct clone *clone,
break;
bucket = get_hash_table_bucket(clone, offset);
- bucket_lock_irqsave(bucket, flags);
+ bucket_lock_irq(bucket);
if (!dm_clone_is_region_hydrated(clone->cmd, offset) &&
!__hash_find(bucket, offset)) {
hydration_init(hd, offset);
__insert_region_hydration(bucket, hd);
- bucket_unlock_irqrestore(bucket, flags);
+ bucket_unlock_irq(bucket);
/* Batch hydration */
__batch_hydration(batch, hd);
@@ -1024,7 +1026,7 @@ static unsigned long __start_next_hydration(struct clone *clone,
return (offset + 1);
}
- bucket_unlock_irqrestore(bucket, flags);
+ bucket_unlock_irq(bucket);
} while (++offset < nr_regions);
@@ -1140,13 +1142,13 @@ static void process_deferred_discards(struct clone *clone)
int r = -EPERM;
struct bio *bio;
struct blk_plug plug;
- unsigned long rs, re, flags;
+ unsigned long rs, re;
struct bio_list discards = BIO_EMPTY_LIST;
- spin_lock_irqsave(&clone->lock, flags);
+ spin_lock_irq(&clone->lock);
bio_list_merge(&discards, &clone->deferred_discard_bios);
bio_list_init(&clone->deferred_discard_bios);
- spin_unlock_irqrestore(&clone->lock, flags);
+ spin_unlock_irq(&clone->lock);
if (bio_list_empty(&discards))
return;
@@ -1176,13 +1178,12 @@ out:
static void process_deferred_bios(struct clone *clone)
{
- unsigned long flags;
struct bio_list bios = BIO_EMPTY_LIST;
- spin_lock_irqsave(&clone->lock, flags);
+ spin_lock_irq(&clone->lock);
bio_list_merge(&bios, &clone->deferred_bios);
bio_list_init(&clone->deferred_bios);
- spin_unlock_irqrestore(&clone->lock, flags);
+ spin_unlock_irq(&clone->lock);
if (bio_list_empty(&bios))
return;
@@ -1193,7 +1194,6 @@ static void process_deferred_bios(struct clone *clone)
static void process_deferred_flush_bios(struct clone *clone)
{
struct bio *bio;
- unsigned long flags;
struct bio_list bios = BIO_EMPTY_LIST;
struct bio_list bio_completions = BIO_EMPTY_LIST;
@@ -1201,13 +1201,13 @@ static void process_deferred_flush_bios(struct clone *clone)
* If there are any deferred flush bios, we must commit the metadata
* before issuing them or signaling their completion.
*/
- spin_lock_irqsave(&clone->lock, flags);
+ spin_lock_irq(&clone->lock);
bio_list_merge(&bios, &clone->deferred_flush_bios);
bio_list_init(&clone->deferred_flush_bios);
bio_list_merge(&bio_completions, &clone->deferred_flush_completions);
bio_list_init(&clone->deferred_flush_completions);
- spin_unlock_irqrestore(&clone->lock, flags);
+ spin_unlock_irq(&clone->lock);
if (bio_list_empty(&bios) && bio_list_empty(&bio_completions) &&
!(dm_clone_changed_this_transaction(clone->cmd) && need_commit_due_to_time(clone)))
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index f87f6495652f..eb9782fc93fe 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -2700,21 +2700,18 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
ret = -ENOMEM;
- cc->io_queue = alloc_workqueue("kcryptd_io/%s",
- WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
- 1, devname);
+ cc->io_queue = alloc_workqueue("kcryptd_io/%s", WQ_MEM_RECLAIM, 1, devname);
if (!cc->io_queue) {
ti->error = "Couldn't create kcryptd io queue";
goto bad;
}
if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
- cc->crypt_queue = alloc_workqueue("kcryptd/%s",
- WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
+ cc->crypt_queue = alloc_workqueue("kcryptd/%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
1, devname);
else
cc->crypt_queue = alloc_workqueue("kcryptd/%s",
- WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
+ WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
num_online_cpus(), devname);
if (!cc->crypt_queue) {
ti->error = "Couldn't create kcryptd queue";
diff --git a/drivers/md/dm-dust.c b/drivers/md/dm-dust.c
index 8288887b7f94..eb37584427a4 100644
--- a/drivers/md/dm-dust.c
+++ b/drivers/md/dm-dust.c
@@ -17,6 +17,7 @@
struct badblock {
struct rb_node node;
sector_t bb;
+ unsigned char wr_fail_cnt;
};
struct dust_device {
@@ -101,7 +102,8 @@ static int dust_remove_block(struct dust_device *dd, unsigned long long block)
return 0;
}
-static int dust_add_block(struct dust_device *dd, unsigned long long block)
+static int dust_add_block(struct dust_device *dd, unsigned long long block,
+ unsigned char wr_fail_cnt)
{
struct badblock *bblock;
unsigned long flags;
@@ -115,6 +117,7 @@ static int dust_add_block(struct dust_device *dd, unsigned long long block)
spin_lock_irqsave(&dd->dust_lock, flags);
bblock->bb = block;
+ bblock->wr_fail_cnt = wr_fail_cnt;
if (!dust_rb_insert(&dd->badblocklist, bblock)) {
if (!dd->quiet_mode) {
DMERR("%s: block %llu already in badblocklist",
@@ -126,8 +129,10 @@ static int dust_add_block(struct dust_device *dd, unsigned long long block)
}
dd->badblock_count++;
- if (!dd->quiet_mode)
- DMINFO("%s: badblock added at block %llu", __func__, block);
+ if (!dd->quiet_mode) {
+ DMINFO("%s: badblock added at block %llu with write fail count %hhu",
+ __func__, block, wr_fail_cnt);
+ }
spin_unlock_irqrestore(&dd->dust_lock, flags);
return 0;
@@ -163,22 +168,27 @@ static int dust_map_read(struct dust_device *dd, sector_t thisblock,
bool fail_read_on_bb)
{
unsigned long flags;
- int ret = DM_MAPIO_REMAPPED;
+ int r = DM_MAPIO_REMAPPED;
if (fail_read_on_bb) {
thisblock >>= dd->sect_per_block_shift;
spin_lock_irqsave(&dd->dust_lock, flags);
- ret = __dust_map_read(dd, thisblock);
+ r = __dust_map_read(dd, thisblock);
spin_unlock_irqrestore(&dd->dust_lock, flags);
}
- return ret;
+ return r;
}
-static void __dust_map_write(struct dust_device *dd, sector_t thisblock)
+static int __dust_map_write(struct dust_device *dd, sector_t thisblock)
{
struct badblock *bblk = dust_rb_search(&dd->badblocklist, thisblock);
+ if (bblk && bblk->wr_fail_cnt > 0) {
+ bblk->wr_fail_cnt--;
+ return DM_MAPIO_KILL;
+ }
+
if (bblk) {
rb_erase(&bblk->node, &dd->badblocklist);
dd->badblock_count--;
@@ -189,37 +199,40 @@ static void __dust_map_write(struct dust_device *dd, sector_t thisblock)
(unsigned long long)thisblock);
}
}
+
+ return DM_MAPIO_REMAPPED;
}
static int dust_map_write(struct dust_device *dd, sector_t thisblock,
bool fail_read_on_bb)
{
unsigned long flags;
+ int ret = DM_MAPIO_REMAPPED;
if (fail_read_on_bb) {
thisblock >>= dd->sect_per_block_shift;
spin_lock_irqsave(&dd->dust_lock, flags);
- __dust_map_write(dd, thisblock);
+ ret = __dust_map_write(dd, thisblock);
spin_unlock_irqrestore(&dd->dust_lock, flags);
}
- return DM_MAPIO_REMAPPED;
+ return ret;
}
static int dust_map(struct dm_target *ti, struct bio *bio)
{
struct dust_device *dd = ti->private;
- int ret;
+ int r;
bio_set_dev(bio, dd->dev->bdev);
bio->bi_iter.bi_sector = dd->start + dm_target_offset(ti, bio->bi_iter.bi_sector);
if (bio_data_dir(bio) == READ)
- ret = dust_map_read(dd, bio->bi_iter.bi_sector, dd->fail_read_on_bb);
+ r = dust_map_read(dd, bio->bi_iter.bi_sector, dd->fail_read_on_bb);
else
- ret = dust_map_write(dd, bio->bi_iter.bi_sector, dd->fail_read_on_bb);
+ r = dust_map_write(dd, bio->bi_iter.bi_sector, dd->fail_read_on_bb);
- return ret;
+ return r;
}
static bool __dust_clear_badblocks(struct rb_root *tree,
@@ -375,8 +388,10 @@ static int dust_message(struct dm_target *ti, unsigned int argc, char **argv,
struct dust_device *dd = ti->private;
sector_t size = i_size_read(dd->dev->bdev->bd_inode) >> SECTOR_SHIFT;
bool invalid_msg = false;
- int result = -EINVAL;
+ int r = -EINVAL;
unsigned long long tmp, block;
+ unsigned char wr_fail_cnt;
+ unsigned int tmp_ui;
unsigned long flags;
char dummy;
@@ -388,45 +403,69 @@ static int dust_message(struct dm_target *ti, unsigned int argc, char **argv,
} else if (!strcasecmp(argv[0], "disable")) {
DMINFO("disabling read failures on bad sectors");
dd->fail_read_on_bb = false;
- result = 0;
+ r = 0;
} else if (!strcasecmp(argv[0], "enable")) {
DMINFO("enabling read failures on bad sectors");
dd->fail_read_on_bb = true;
- result = 0;
+ r = 0;
} else if (!strcasecmp(argv[0], "countbadblocks")) {
spin_lock_irqsave(&dd->dust_lock, flags);
DMINFO("countbadblocks: %llu badblock(s) found",
dd->badblock_count);
spin_unlock_irqrestore(&dd->dust_lock, flags);
- result = 0;
+ r = 0;
} else if (!strcasecmp(argv[0], "clearbadblocks")) {
- result = dust_clear_badblocks(dd);
+ r = dust_clear_badblocks(dd);
} else if (!strcasecmp(argv[0], "quiet")) {
if (!dd->quiet_mode)
dd->quiet_mode = true;
else
dd->quiet_mode = false;
- result = 0;
+ r = 0;
} else {
invalid_msg = true;
}
} else if (argc == 2) {
if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1)
- return result;
+ return r;
block = tmp;
sector_div(size, dd->sect_per_block);
if (block > size) {
DMERR("selected block value out of range");
- return result;
+ return r;
}
if (!strcasecmp(argv[0], "addbadblock"))
- result = dust_add_block(dd, block);
+ r = dust_add_block(dd, block, 0);
else if (!strcasecmp(argv[0], "removebadblock"))
- result = dust_remove_block(dd, block);
+ r = dust_remove_block(dd, block);
else if (!strcasecmp(argv[0], "queryblock"))
- result = dust_query_block(dd, block);
+ r = dust_query_block(dd, block);
+ else
+ invalid_msg = true;
+
+ } else if (argc == 3) {
+ if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1)
+ return r;
+
+ if (sscanf(argv[2], "%u%c", &tmp_ui, &dummy) != 1)
+ return r;
+
+ block = tmp;
+ if (tmp_ui > 255) {
+ DMERR("selected write fail count out of range");
+ return r;
+ }
+ wr_fail_cnt = tmp_ui;
+ sector_div(size, dd->sect_per_block);
+ if (block > size) {
+ DMERR("selected block value out of range");
+ return r;
+ }
+
+ if (!strcasecmp(argv[0], "addbadblock"))
+ r = dust_add_block(dd, block, wr_fail_cnt);
else
invalid_msg = true;
@@ -436,7 +475,7 @@ static int dust_message(struct dm_target *ti, unsigned int argc, char **argv,
if (invalid_msg)
DMERR("unrecognized message '%s' received", argv[0]);
- return result;
+ return r;
}
static void dust_status(struct dm_target *ti, status_type_t type,
@@ -499,12 +538,12 @@ static struct target_type dust_target = {
static int __init dm_dust_init(void)
{
- int result = dm_register_target(&dust_target);
+ int r = dm_register_target(&dust_target);
- if (result < 0)
- DMERR("dm_register_target failed %d", result);
+ if (r < 0)
+ DMERR("dm_register_target failed %d", r);
- return result;
+ return r;
}
static void __exit dm_dust_exit(void)
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 2900fbde89b3..a2cc9e45cbba 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -280,7 +280,7 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
struct flakey_c *fc = ti->private;
bio_set_dev(bio, fc->dev->bdev);
- if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET)
+ if (bio_sectors(bio) || op_is_zone_mgmt(bio_op(bio)))
bio->bi_iter.bi_sector =
flakey_map_sector(ti, bio->bi_iter.bi_sector);
}
@@ -322,8 +322,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
pb->bio_submitted = false;
- /* Do not fail reset zone */
- if (bio_op(bio) == REQ_OP_ZONE_RESET)
+ if (op_is_zone_mgmt(bio_op(bio)))
goto map_bio;
/* Are we alive ? */
@@ -384,7 +383,7 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio,
struct flakey_c *fc = ti->private;
struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
- if (bio_op(bio) == REQ_OP_ZONE_RESET)
+ if (op_is_zone_mgmt(bio_op(bio)))
return DM_ENDIO_DONE;
if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
@@ -460,21 +459,15 @@ static int flakey_prepare_ioctl(struct dm_target *ti, struct block_device **bdev
}
#ifdef CONFIG_BLK_DEV_ZONED
-static int flakey_report_zones(struct dm_target *ti, sector_t sector,
- struct blk_zone *zones, unsigned int *nr_zones)
+static int flakey_report_zones(struct dm_target *ti,
+ struct dm_report_zones_args *args, unsigned int nr_zones)
{
struct flakey_c *fc = ti->private;
- int ret;
+ sector_t sector = flakey_map_sector(ti, args->next_sector);
- /* Do report and remap it */
- ret = blkdev_report_zones(fc->dev->bdev, flakey_map_sector(ti, sector),
- zones, nr_zones);
- if (ret != 0)
- return ret;
-
- if (*nr_zones)
- dm_remap_zone_report(ti, fc->start, zones, nr_zones);
- return 0;
+ args->start = fc->start;
+ return blkdev_report_zones(fc->dev->bdev, sector, nr_zones,
+ dm_report_zones_cb, args);
}
#endif
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index dab4446fe7d8..b225b3e445fa 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -53,6 +53,7 @@
#define SB_VERSION_1 1
#define SB_VERSION_2 2
#define SB_VERSION_3 3
+#define SB_VERSION_4 4
#define SB_SECTORS 8
#define MAX_SECTORS_PER_BLOCK 8
@@ -73,6 +74,7 @@ struct superblock {
#define SB_FLAG_HAVE_JOURNAL_MAC 0x1
#define SB_FLAG_RECALCULATING 0x2
#define SB_FLAG_DIRTY_BITMAP 0x4
+#define SB_FLAG_FIXED_PADDING 0x8
#define JOURNAL_ENTRY_ROUNDUP 8
@@ -250,6 +252,7 @@ struct dm_integrity_c {
bool journal_uptodate;
bool just_formatted;
bool recalculate_flag;
+ bool fix_padding;
struct alg_spec internal_hash_alg;
struct alg_spec journal_crypt_alg;
@@ -463,7 +466,9 @@ static void wraparound_section(struct dm_integrity_c *ic, unsigned *sec_ptr)
static void sb_set_version(struct dm_integrity_c *ic)
{
- if (ic->mode == 'B' || ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP))
+ if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING))
+ ic->sb->version = SB_VERSION_4;
+ else if (ic->mode == 'B' || ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP))
ic->sb->version = SB_VERSION_3;
else if (ic->meta_dev || ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
ic->sb->version = SB_VERSION_2;
@@ -2955,6 +2960,7 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
arg_count += !!ic->internal_hash_alg.alg_string;
arg_count += !!ic->journal_crypt_alg.alg_string;
arg_count += !!ic->journal_mac_alg.alg_string;
+ arg_count += (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0;
DMEMIT("%s %llu %u %c %u", ic->dev->name, (unsigned long long)ic->start,
ic->tag_size, ic->mode, arg_count);
if (ic->meta_dev)
@@ -2974,6 +2980,8 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
DMEMIT(" sectors_per_bit:%llu", (unsigned long long)ic->sectors_per_block << ic->log2_blocks_per_bitmap_bit);
DMEMIT(" bitmap_flush_interval:%u", jiffies_to_msecs(ic->bitmap_flush_interval));
}
+ if ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0)
+ DMEMIT(" fix_padding");
#define EMIT_ALG(a, n) \
do { \
@@ -3042,8 +3050,14 @@ static int calculate_device_limits(struct dm_integrity_c *ic)
if (!ic->meta_dev) {
sector_t last_sector, last_area, last_offset;
- ic->metadata_run = roundup((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block),
- (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS)) >> SECTOR_SHIFT;
+ /* we have to maintain excessive padding for compatibility with existing volumes */
+ __u64 metadata_run_padding =
+ ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING) ?
+ (__u64)(METADATA_PADDING_SECTORS << SECTOR_SHIFT) :
+ (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS);
+
+ ic->metadata_run = round_up((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block),
+ metadata_run_padding) >> SECTOR_SHIFT;
if (!(ic->metadata_run & (ic->metadata_run - 1)))
ic->log2_metadata_run = __ffs(ic->metadata_run);
else
@@ -3086,6 +3100,8 @@ static int initialize_superblock(struct dm_integrity_c *ic, unsigned journal_sec
journal_sections = 1;
if (!ic->meta_dev) {
+ if (ic->fix_padding)
+ ic->sb->flags |= cpu_to_le32(SB_FLAG_FIXED_PADDING);
ic->sb->journal_sections = cpu_to_le32(journal_sections);
if (!interleave_sectors)
interleave_sectors = DEFAULT_INTERLEAVE_SECTORS;
@@ -3725,6 +3741,8 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto bad;
} else if (!strcmp(opt_string, "recalculate")) {
ic->recalculate_flag = true;
+ } else if (!strcmp(opt_string, "fix_padding")) {
+ ic->fix_padding = true;
} else {
r = -EINVAL;
ti->error = "Invalid argument";
@@ -3867,7 +3885,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
should_write_sb = true;
}
- if (!ic->sb->version || ic->sb->version > SB_VERSION_3) {
+ if (!ic->sb->version || ic->sb->version > SB_VERSION_4) {
r = -EINVAL;
ti->error = "Unknown version";
goto bad;
@@ -4182,7 +4200,7 @@ static void dm_integrity_dtr(struct dm_target *ti)
static struct target_type integrity_target = {
.name = "integrity",
- .version = {1, 3, 0},
+ .version = {1, 4, 0},
.module = THIS_MODULE,
.features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY,
.ctr = dm_integrity_ctr,
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index ecefe6703736..8d07fdf63a47 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -90,7 +90,7 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio)
struct linear_c *lc = ti->private;
bio_set_dev(bio, lc->dev->bdev);
- if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET)
+ if (bio_sectors(bio) || op_is_zone_mgmt(bio_op(bio)))
bio->bi_iter.bi_sector =
linear_map_sector(ti, bio->bi_iter.bi_sector);
}
@@ -136,21 +136,15 @@ static int linear_prepare_ioctl(struct dm_target *ti, struct block_device **bdev
}
#ifdef CONFIG_BLK_DEV_ZONED
-static int linear_report_zones(struct dm_target *ti, sector_t sector,
- struct blk_zone *zones, unsigned int *nr_zones)
+static int linear_report_zones(struct dm_target *ti,
+ struct dm_report_zones_args *args, unsigned int nr_zones)
{
- struct linear_c *lc = (struct linear_c *) ti->private;
- int ret;
-
- /* Do report and remap it */
- ret = blkdev_report_zones(lc->dev->bdev, linear_map_sector(ti, sector),
- zones, nr_zones);
- if (ret != 0)
- return ret;
+ struct linear_c *lc = ti->private;
+ sector_t sector = linear_map_sector(ti, args->next_sector);
- if (*nr_zones)
- dm_remap_zone_report(ti, lc->start, zones, nr_zones);
- return 0;
+ args->start = lc->start;
+ return blkdev_report_zones(lc->dev->bdev, sector, nr_zones,
+ dm_report_zones_cb, args);
}
#endif
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index b0aa595e4375..c412eaa975fc 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -209,6 +209,7 @@ struct raid_dev {
#define RT_FLAG_RS_SUSPENDED 5
#define RT_FLAG_RS_IN_SYNC 6
#define RT_FLAG_RS_RESYNCING 7
+#define RT_FLAG_RS_GROW 8
/* Array elements of 64 bit needed for rebuild/failed disk bits */
#define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8)
@@ -241,6 +242,9 @@ struct raid_set {
struct raid_type *raid_type;
struct dm_target_callbacks callbacks;
+ sector_t array_sectors;
+ sector_t dev_sectors;
+
/* Optional raid4/5/6 journal device */
struct journal_dev {
struct dm_dev *dev;
@@ -616,7 +620,6 @@ static int raid10_format_to_md_layout(struct raid_set *rs,
} else if (algorithm == ALGORITHM_RAID10_FAR) {
f = copies;
- r = !RAID10_OFFSET;
if (!test_bit(__CTR_FLAG_RAID10_USE_NEAR_SETS, &rs->ctr_flags))
r |= RAID10_USE_FAR_SETS;
@@ -1615,13 +1618,12 @@ static int _check_data_dev_sectors(struct raid_set *rs)
}
/* Calculate the sectors per device and per array used for @rs */
-static int rs_set_dev_and_array_sectors(struct raid_set *rs, bool use_mddev)
+static int rs_set_dev_and_array_sectors(struct raid_set *rs, sector_t sectors, bool use_mddev)
{
int delta_disks;
unsigned int data_stripes;
+ sector_t array_sectors = sectors, dev_sectors = sectors;
struct mddev *mddev = &rs->md;
- struct md_rdev *rdev;
- sector_t array_sectors = rs->ti->len, dev_sectors = rs->ti->len;
if (use_mddev) {
delta_disks = mddev->delta_disks;
@@ -1656,12 +1658,9 @@ static int rs_set_dev_and_array_sectors(struct raid_set *rs, bool use_mddev)
/* Striped layouts */
array_sectors = (data_stripes + delta_disks) * dev_sectors;
- rdev_for_each(rdev, mddev)
- if (!test_bit(Journal, &rdev->flags))
- rdev->sectors = dev_sectors;
-
mddev->array_sectors = array_sectors;
mddev->dev_sectors = dev_sectors;
+ rs_set_rdev_sectors(rs);
return _check_data_dev_sectors(rs);
bad:
@@ -1670,7 +1669,7 @@ bad:
}
/* Setup recovery on @rs */
-static void __rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors)
+static void rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors)
{
/* raid0 does not recover */
if (rs_is_raid0(rs))
@@ -1691,22 +1690,6 @@ static void __rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors)
? MaxSector : dev_sectors;
}
-/* Setup recovery on @rs based on raid type, device size and 'nosync' flag */
-static void rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors)
-{
- if (!dev_sectors)
- /* New raid set or 'sync' flag provided */
- __rs_setup_recovery(rs, 0);
- else if (dev_sectors == MaxSector)
- /* Prevent recovery */
- __rs_setup_recovery(rs, MaxSector);
- else if (__rdev_sectors(rs) < dev_sectors)
- /* Grown raid set */
- __rs_setup_recovery(rs, __rdev_sectors(rs));
- else
- __rs_setup_recovery(rs, MaxSector);
-}
-
static void do_table_event(struct work_struct *ws)
{
struct raid_set *rs = container_of(ws, struct raid_set, md.event_work);
@@ -2474,7 +2457,7 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev)
return -EINVAL;
}
- /* Enable bitmap creation for RAID levels != 0 */
+ /* Enable bitmap creation on @rs unless no metadevs or raid0 or journaled raid4/5/6 set. */
mddev->bitmap_info.offset = (rt_is_raid0(rs->raid_type) || rs->journal_dev.dev) ? 0 : to_sector(4096);
mddev->bitmap_info.default_offset = mddev->bitmap_info.offset;
@@ -2911,7 +2894,7 @@ static int rs_setup_reshape(struct raid_set *rs)
/* Remove disk(s) */
} else if (rs->delta_disks < 0) {
- r = rs_set_dev_and_array_sectors(rs, true);
+ r = rs_set_dev_and_array_sectors(rs, rs->ti->len, true);
mddev->reshape_backwards = 1; /* removing disk(s) -> backward reshape */
/* Change layout and/or chunk size */
@@ -3008,7 +2991,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
bool resize = false;
struct raid_type *rt;
unsigned int num_raid_params, num_raid_devs;
- sector_t calculated_dev_sectors, rdev_sectors, reshape_sectors;
+ sector_t sb_array_sectors, rdev_sectors, reshape_sectors;
struct raid_set *rs = NULL;
const char *arg;
struct rs_layout rs_layout;
@@ -3067,11 +3050,13 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
*
* Any existing superblock will overwrite the array and device sizes
*/
- r = rs_set_dev_and_array_sectors(rs, false);
+ r = rs_set_dev_and_array_sectors(rs, rs->ti->len, false);
if (r)
goto bad;
- calculated_dev_sectors = rs->md.dev_sectors;
+ /* Memorize just calculated, potentially larger sizes to grow the raid set in preresume */
+ rs->array_sectors = rs->md.array_sectors;
+ rs->dev_sectors = rs->md.dev_sectors;
/*
* Backup any new raid set level, layout, ...
@@ -3084,6 +3069,8 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (r)
goto bad;
+ /* All in-core metadata now as of current superblocks after calling analyse_superblocks() */
+ sb_array_sectors = rs->md.array_sectors;
rdev_sectors = __rdev_sectors(rs);
if (!rdev_sectors) {
ti->error = "Invalid rdev size";
@@ -3093,8 +3080,11 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
reshape_sectors = _get_reshape_sectors(rs);
- if (calculated_dev_sectors != rdev_sectors)
- resize = calculated_dev_sectors != (reshape_sectors ? rdev_sectors - reshape_sectors : rdev_sectors);
+ if (rs->dev_sectors != rdev_sectors) {
+ resize = (rs->dev_sectors != rdev_sectors - reshape_sectors);
+ if (rs->dev_sectors > rdev_sectors - reshape_sectors)
+ set_bit(RT_FLAG_RS_GROW, &rs->runtime_flags);
+ }
INIT_WORK(&rs->md.event_work, do_table_event);
ti->private = rs;
@@ -3121,13 +3111,8 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
rs_set_new(rs);
} else if (rs_is_recovering(rs)) {
- /* Rebuild particular devices */
- if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) {
- set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
- rs_setup_recovery(rs, MaxSector);
- }
/* A recovering raid set may be resized */
- ; /* skip setup rs */
+ goto size_check;
} else if (rs_is_reshaping(rs)) {
/* Have to reject size change request during reshape */
if (resize) {
@@ -3171,6 +3156,9 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
rs_setup_recovery(rs, MaxSector);
rs_set_new(rs);
} else if (rs_reshape_requested(rs)) {
+ /* Only request grow on raid set size extensions, not on reshapes. */
+ clear_bit(RT_FLAG_RS_GROW, &rs->runtime_flags);
+
/*
* No need to check for 'ongoing' takeover here, because takeover
* is an instant operation as oposed to an ongoing reshape.
@@ -3201,13 +3189,31 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
rs_set_cur(rs);
} else {
+size_check:
/* May not set recovery when a device rebuild is requested */
if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) {
- rs_setup_recovery(rs, MaxSector);
+ clear_bit(RT_FLAG_RS_GROW, &rs->runtime_flags);
set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
- } else
- rs_setup_recovery(rs, test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags) ?
- 0 : (resize ? calculated_dev_sectors : MaxSector));
+ rs_setup_recovery(rs, MaxSector);
+ } else if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags)) {
+ /*
+ * Set raid set to current size, i.e. size as of
+ * superblocks to grow to larger size in preresume.
+ */
+ r = rs_set_dev_and_array_sectors(rs, sb_array_sectors, false);
+ if (r)
+ goto bad;
+
+ rs_setup_recovery(rs, rs->md.recovery_cp < rs->md.dev_sectors ? rs->md.recovery_cp : rs->md.dev_sectors);
+ } else {
+ /* This is no size change or it is shrinking, update size and record in superblocks */
+ r = rs_set_dev_and_array_sectors(rs, rs->ti->len, false);
+ if (r)
+ goto bad;
+
+ if (sb_array_sectors > rs->array_sectors)
+ set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
+ }
rs_set_cur(rs);
}
@@ -3406,10 +3412,9 @@ static const char *__raid_dev_status(struct raid_set *rs, struct md_rdev *rdev)
/* Helper to return resync/reshape progress for @rs and runtime flags for raid set in sync / resynching */
static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
- sector_t resync_max_sectors)
+ enum sync_state state, sector_t resync_max_sectors)
{
sector_t r;
- enum sync_state state;
struct mddev *mddev = &rs->md;
clear_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
@@ -3420,8 +3425,6 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
} else {
- state = decipher_sync_action(mddev, recovery);
-
if (state == st_idle && !test_bit(MD_RECOVERY_INTR, &recovery))
r = mddev->recovery_cp;
else
@@ -3439,18 +3442,14 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
/*
* In case we are recovering, the array is not in sync
* and health chars should show the recovering legs.
+ *
+ * Already retrieved recovery offset from curr_resync_completed above.
*/
;
- else if (state == st_resync)
- /*
- * If "resync" is occurring, the raid set
- * is or may be out of sync hence the health
- * characters shall be 'a'.
- */
- set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
- else if (state == st_reshape)
+
+ else if (state == st_resync || state == st_reshape)
/*
- * If "reshape" is occurring, the raid set
+ * If "resync/reshape" is occurring, the raid set
* is or may be out of sync hence the health
* characters shall be 'a'.
*/
@@ -3464,22 +3463,22 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
*/
set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
- else {
- struct md_rdev *rdev;
-
+ else if (test_bit(MD_RECOVERY_NEEDED, &recovery))
/*
* We are idle and recovery is needed, prevent 'A' chars race
* caused by components still set to in-sync by constructor.
*/
- if (test_bit(MD_RECOVERY_NEEDED, &recovery))
- set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
+ set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
+ else {
/*
- * The raid set may be doing an initial sync, or it may
- * be rebuilding individual components. If all the
- * devices are In_sync, then it is the raid set that is
- * being initialized.
+ * We are idle and the raid set may be doing an initial
+ * sync, or it may be rebuilding individual components.
+ * If all the devices are In_sync, then it is the raid set
+ * that is being initialized.
*/
+ struct md_rdev *rdev;
+
set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
rdev_for_each(rdev, mddev)
if (!test_bit(Journal, &rdev->flags) &&
@@ -3512,7 +3511,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
unsigned int rebuild_disks;
unsigned int write_mostly_params = 0;
sector_t progress, resync_max_sectors, resync_mismatches;
- const char *sync_action;
+ enum sync_state state;
struct raid_type *rt;
switch (type) {
@@ -3526,14 +3525,14 @@ static void raid_status(struct dm_target *ti, status_type_t type,
/* Access most recent mddev properties for status output */
smp_rmb();
- recovery = rs->md.recovery;
/* Get sensible max sectors even if raid set not yet started */
resync_max_sectors = test_bit(RT_FLAG_RS_PRERESUMED, &rs->runtime_flags) ?
mddev->resync_max_sectors : mddev->dev_sectors;
- progress = rs_get_progress(rs, recovery, resync_max_sectors);
+ recovery = rs->md.recovery;
+ state = decipher_sync_action(mddev, recovery);
+ progress = rs_get_progress(rs, recovery, state, resync_max_sectors);
resync_mismatches = (mddev->last_sync_action && !strcasecmp(mddev->last_sync_action, "check")) ?
atomic64_read(&mddev->resync_mismatches) : 0;
- sync_action = sync_str(decipher_sync_action(&rs->md, recovery));
/* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */
for (i = 0; i < rs->raid_disks; i++)
@@ -3561,7 +3560,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
* See Documentation/admin-guide/device-mapper/dm-raid.rst for
* information on each of these states.
*/
- DMEMIT(" %s", sync_action);
+ DMEMIT(" %s", sync_str(state));
/*
* v1.5.0+:
@@ -3955,11 +3954,22 @@ static int raid_preresume(struct dm_target *ti)
if (r)
return r;
- /* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) */
- if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap &&
- mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)) {
- r = md_bitmap_resize(mddev->bitmap, mddev->dev_sectors,
- to_bytes(rs->requested_bitmap_chunk_sectors), 0);
+ /* We are extending the raid set size, adjust mddev/md_rdev sizes and set capacity. */
+ if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags)) {
+ mddev->array_sectors = rs->array_sectors;
+ mddev->dev_sectors = rs->dev_sectors;
+ rs_set_rdev_sectors(rs);
+ rs_set_capacity(rs);
+ }
+
+ /* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) or grown device size */
+ if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap &&
+ (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags) ||
+ (rs->requested_bitmap_chunk_sectors &&
+ mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)))) {
+ int chunksize = to_bytes(rs->requested_bitmap_chunk_sectors) ?: mddev->bitmap_info.chunksize;
+
+ r = md_bitmap_resize(mddev->bitmap, mddev->dev_sectors, chunksize, 0);
if (r)
DMERR("Failed to resize bitmap");
}
@@ -3968,8 +3978,10 @@ static int raid_preresume(struct dm_target *ti)
/* Be prepared for mddev_resume() in raid_resume() */
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) {
- set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+ set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
mddev->resync_min = mddev->recovery_cp;
+ if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags))
+ mddev->resync_max_sectors = mddev->dev_sectors;
}
/* Check for any reshape request unless new raid set */
@@ -4017,7 +4029,7 @@ static void raid_resume(struct dm_target *ti)
static struct target_type raid_target = {
.name = "raid",
- .version = {1, 14, 0},
+ .version = {1, 15, 0},
.module = THIS_MODULE,
.ctr = raid_ctr,
.dtr = raid_dtr,
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 8547d7594338..63bbcc20f49a 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -55,19 +55,6 @@ static void trigger_event(struct work_struct *work)
dm_table_event(sc->ti->table);
}
-static inline struct stripe_c *alloc_context(unsigned int stripes)
-{
- size_t len;
-
- if (dm_array_too_big(sizeof(struct stripe_c), sizeof(struct stripe),
- stripes))
- return NULL;
-
- len = sizeof(struct stripe_c) + (sizeof(struct stripe) * stripes);
-
- return kmalloc(len, GFP_KERNEL);
-}
-
/*
* Parse a single <dev> <sector> pair
*/
@@ -142,7 +129,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
return -EINVAL;
}
- sc = alloc_context(stripes);
+ sc = kmalloc(struct_size(sc, stripe, stripes), GFP_KERNEL);
if (!sc) {
ti->error = "Memory allocation for striped context "
"failed";
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 52e049554f5c..2ae0c1913766 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -918,21 +918,15 @@ bool dm_table_supports_dax(struct dm_table *t,
static bool dm_table_does_not_support_partial_completion(struct dm_table *t);
-struct verify_rq_based_data {
- unsigned sq_count;
- unsigned mq_count;
-};
-
-static int device_is_rq_based(struct dm_target *ti, struct dm_dev *dev,
- sector_t start, sector_t len, void *data)
+static int device_is_rq_stackable(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
{
- struct request_queue *q = bdev_get_queue(dev->bdev);
- struct verify_rq_based_data *v = data;
+ struct block_device *bdev = dev->bdev;
+ struct request_queue *q = bdev_get_queue(bdev);
- if (queue_is_mq(q))
- v->mq_count++;
- else
- v->sq_count++;
+ /* request-based cannot stack on partitions! */
+ if (bdev != bdev->bd_contains)
+ return false;
return queue_is_mq(q);
}
@@ -941,7 +935,6 @@ static int dm_table_determine_type(struct dm_table *t)
{
unsigned i;
unsigned bio_based = 0, request_based = 0, hybrid = 0;
- struct verify_rq_based_data v = {.sq_count = 0, .mq_count = 0};
struct dm_target *tgt;
struct list_head *devices = dm_table_get_devices(t);
enum dm_queue_mode live_md_type = dm_get_md_type(t->md);
@@ -1045,14 +1038,10 @@ verify_rq_based:
/* Non-request-stackable devices can't be used for request-based dm */
if (!tgt->type->iterate_devices ||
- !tgt->type->iterate_devices(tgt, device_is_rq_based, &v)) {
+ !tgt->type->iterate_devices(tgt, device_is_rq_stackable, NULL)) {
DMERR("table load rejected: including non-request-stackable devices");
return -EINVAL;
}
- if (v.sq_count > 0) {
- DMERR("table load rejected: not all devices are blk-mq request-stackable");
- return -EINVAL;
- }
return 0;
}
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index fcd887703f95..5a2c494cb552 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -609,13 +609,12 @@ static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master,
blk_status_t error)
{
struct bio_list bios;
- unsigned long flags;
bio_list_init(&bios);
- spin_lock_irqsave(&tc->lock, flags);
+ spin_lock_irq(&tc->lock);
__merge_bio_list(&bios, master);
- spin_unlock_irqrestore(&tc->lock, flags);
+ spin_unlock_irq(&tc->lock);
error_bio_list(&bios, error);
}
@@ -623,15 +622,14 @@ static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master,
static void requeue_deferred_cells(struct thin_c *tc)
{
struct pool *pool = tc->pool;
- unsigned long flags;
struct list_head cells;
struct dm_bio_prison_cell *cell, *tmp;
INIT_LIST_HEAD(&cells);
- spin_lock_irqsave(&tc->lock, flags);
+ spin_lock_irq(&tc->lock);
list_splice_init(&tc->deferred_cells, &cells);
- spin_unlock_irqrestore(&tc->lock, flags);
+ spin_unlock_irq(&tc->lock);
list_for_each_entry_safe(cell, tmp, &cells, user_list)
cell_requeue(pool, cell);
@@ -640,14 +638,13 @@ static void requeue_deferred_cells(struct thin_c *tc)
static void requeue_io(struct thin_c *tc)
{
struct bio_list bios;
- unsigned long flags;
bio_list_init(&bios);
- spin_lock_irqsave(&tc->lock, flags);
+ spin_lock_irq(&tc->lock);
__merge_bio_list(&bios, &tc->deferred_bio_list);
__merge_bio_list(&bios, &tc->retry_on_resume_list);
- spin_unlock_irqrestore(&tc->lock, flags);
+ spin_unlock_irq(&tc->lock);
error_bio_list(&bios, BLK_STS_DM_REQUEUE);
requeue_deferred_cells(tc);
@@ -756,7 +753,6 @@ static void inc_all_io_entry(struct pool *pool, struct bio *bio)
static void issue(struct thin_c *tc, struct bio *bio)
{
struct pool *pool = tc->pool;
- unsigned long flags;
if (!bio_triggers_commit(tc, bio)) {
generic_make_request(bio);
@@ -777,9 +773,9 @@ static void issue(struct thin_c *tc, struct bio *bio)
* Batch together any bios that trigger commits and then issue a
* single commit for them in process_deferred_bios().
*/
- spin_lock_irqsave(&pool->lock, flags);
+ spin_lock_irq(&pool->lock);
bio_list_add(&pool->deferred_flush_bios, bio);
- spin_unlock_irqrestore(&pool->lock, flags);
+ spin_unlock_irq(&pool->lock);
}
static void remap_to_origin_and_issue(struct thin_c *tc, struct bio *bio)
@@ -886,12 +882,15 @@ static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *c
{
struct pool *pool = tc->pool;
unsigned long flags;
+ int has_work;
spin_lock_irqsave(&tc->lock, flags);
cell_release_no_holder(pool, cell, &tc->deferred_bio_list);
+ has_work = !bio_list_empty(&tc->deferred_bio_list);
spin_unlock_irqrestore(&tc->lock, flags);
- wake_worker(pool);
+ if (has_work)
+ wake_worker(pool);
}
static void thin_defer_bio(struct thin_c *tc, struct bio *bio);
@@ -960,7 +959,6 @@ static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m)
static void complete_overwrite_bio(struct thin_c *tc, struct bio *bio)
{
struct pool *pool = tc->pool;
- unsigned long flags;
/*
* If the bio has the REQ_FUA flag set we must commit the metadata
@@ -985,9 +983,9 @@ static void complete_overwrite_bio(struct thin_c *tc, struct bio *bio)
* Batch together any bios that trigger commits and then issue a
* single commit for them in process_deferred_bios().
*/
- spin_lock_irqsave(&pool->lock, flags);
+ spin_lock_irq(&pool->lock);
bio_list_add(&pool->deferred_flush_completions, bio);
- spin_unlock_irqrestore(&pool->lock, flags);
+ spin_unlock_irq(&pool->lock);
}
static void process_prepared_mapping(struct dm_thin_new_mapping *m)
@@ -1226,14 +1224,13 @@ static void process_prepared_discard_passdown_pt2(struct dm_thin_new_mapping *m)
static void process_prepared(struct pool *pool, struct list_head *head,
process_mapping_fn *fn)
{
- unsigned long flags;
struct list_head maps;
struct dm_thin_new_mapping *m, *tmp;
INIT_LIST_HEAD(&maps);
- spin_lock_irqsave(&pool->lock, flags);
+ spin_lock_irq(&pool->lock);
list_splice_init(head, &maps);
- spin_unlock_irqrestore(&pool->lock, flags);
+ spin_unlock_irq(&pool->lock);
list_for_each_entry_safe(m, tmp, &maps, list)
(*fn)(m);
@@ -1510,14 +1507,12 @@ static int commit(struct pool *pool)
static void check_low_water_mark(struct pool *pool, dm_block_t free_blocks)
{
- unsigned long flags;
-
if (free_blocks <= pool->low_water_blocks && !pool->low_water_triggered) {
DMWARN("%s: reached low water mark for data device: sending event.",
dm_device_name(pool->pool_md));
- spin_lock_irqsave(&pool->lock, flags);
+ spin_lock_irq(&pool->lock);
pool->low_water_triggered = true;
- spin_unlock_irqrestore(&pool->lock, flags);
+ spin_unlock_irq(&pool->lock);
dm_table_event(pool->ti->table);
}
}
@@ -1593,11 +1588,10 @@ static void retry_on_resume(struct bio *bio)
{
struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
struct thin_c *tc = h->tc;
- unsigned long flags;
- spin_lock_irqsave(&tc->lock, flags);
+ spin_lock_irq(&tc->lock);
bio_list_add(&tc->retry_on_resume_list, bio);
- spin_unlock_irqrestore(&tc->lock, flags);
+ spin_unlock_irq(&tc->lock);
}
static blk_status_t should_error_unserviceable_bio(struct pool *pool)
@@ -2170,7 +2164,6 @@ static void __sort_thin_deferred_bios(struct thin_c *tc)
static void process_thin_deferred_bios(struct thin_c *tc)
{
struct pool *pool = tc->pool;
- unsigned long flags;
struct bio *bio;
struct bio_list bios;
struct blk_plug plug;
@@ -2184,10 +2177,10 @@ static void process_thin_deferred_bios(struct thin_c *tc)
bio_list_init(&bios);
- spin_lock_irqsave(&tc->lock, flags);
+ spin_lock_irq(&tc->lock);
if (bio_list_empty(&tc->deferred_bio_list)) {
- spin_unlock_irqrestore(&tc->lock, flags);
+ spin_unlock_irq(&tc->lock);
return;
}
@@ -2196,7 +2189,7 @@ static void process_thin_deferred_bios(struct thin_c *tc)
bio_list_merge(&bios, &tc->deferred_bio_list);
bio_list_init(&tc->deferred_bio_list);
- spin_unlock_irqrestore(&tc->lock, flags);
+ spin_unlock_irq(&tc->lock);
blk_start_plug(&plug);
while ((bio = bio_list_pop(&bios))) {
@@ -2206,10 +2199,10 @@ static void process_thin_deferred_bios(struct thin_c *tc)
* prepared mappings to process.
*/
if (ensure_next_mapping(pool)) {
- spin_lock_irqsave(&tc->lock, flags);
+ spin_lock_irq(&tc->lock);
bio_list_add(&tc->deferred_bio_list, bio);
bio_list_merge(&tc->deferred_bio_list, &bios);
- spin_unlock_irqrestore(&tc->lock, flags);
+ spin_unlock_irq(&tc->lock);
break;
}
@@ -2264,16 +2257,15 @@ static unsigned sort_cells(struct pool *pool, struct list_head *cells)
static void process_thin_deferred_cells(struct thin_c *tc)
{
struct pool *pool = tc->pool;
- unsigned long flags;
struct list_head cells;
struct dm_bio_prison_cell *cell;
unsigned i, j, count;
INIT_LIST_HEAD(&cells);
- spin_lock_irqsave(&tc->lock, flags);
+ spin_lock_irq(&tc->lock);
list_splice_init(&tc->deferred_cells, &cells);
- spin_unlock_irqrestore(&tc->lock, flags);
+ spin_unlock_irq(&tc->lock);
if (list_empty(&cells))
return;
@@ -2294,9 +2286,9 @@ static void process_thin_deferred_cells(struct thin_c *tc)
for (j = i; j < count; j++)
list_add(&pool->cell_sort_array[j]->user_list, &cells);
- spin_lock_irqsave(&tc->lock, flags);
+ spin_lock_irq(&tc->lock);
list_splice(&cells, &tc->deferred_cells);
- spin_unlock_irqrestore(&tc->lock, flags);
+ spin_unlock_irq(&tc->lock);
return;
}
@@ -2349,7 +2341,6 @@ static struct thin_c *get_next_thin(struct pool *pool, struct thin_c *tc)
static void process_deferred_bios(struct pool *pool)
{
- unsigned long flags;
struct bio *bio;
struct bio_list bios, bio_completions;
struct thin_c *tc;
@@ -2368,13 +2359,13 @@ static void process_deferred_bios(struct pool *pool)
bio_list_init(&bios);
bio_list_init(&bio_completions);
- spin_lock_irqsave(&pool->lock, flags);
+ spin_lock_irq(&pool->lock);
bio_list_merge(&bios, &pool->deferred_flush_bios);
bio_list_init(&pool->deferred_flush_bios);
bio_list_merge(&bio_completions, &pool->deferred_flush_completions);
bio_list_init(&pool->deferred_flush_completions);
- spin_unlock_irqrestore(&pool->lock, flags);
+ spin_unlock_irq(&pool->lock);
if (bio_list_empty(&bios) && bio_list_empty(&bio_completions) &&
!(dm_pool_changed_this_transaction(pool->pmd) && need_commit_due_to_time(pool)))
@@ -2657,12 +2648,11 @@ static void metadata_operation_failed(struct pool *pool, const char *op, int r)
*/
static void thin_defer_bio(struct thin_c *tc, struct bio *bio)
{
- unsigned long flags;
struct pool *pool = tc->pool;
- spin_lock_irqsave(&tc->lock, flags);
+ spin_lock_irq(&tc->lock);
bio_list_add(&tc->deferred_bio_list, bio);
- spin_unlock_irqrestore(&tc->lock, flags);
+ spin_unlock_irq(&tc->lock);
wake_worker(pool);
}
@@ -2678,13 +2668,12 @@ static void thin_defer_bio_with_throttle(struct thin_c *tc, struct bio *bio)
static void thin_defer_cell(struct thin_c *tc, struct dm_bio_prison_cell *cell)
{
- unsigned long flags;
struct pool *pool = tc->pool;
throttle_lock(&pool->throttle);
- spin_lock_irqsave(&tc->lock, flags);
+ spin_lock_irq(&tc->lock);
list_add_tail(&cell->user_list, &tc->deferred_cells);
- spin_unlock_irqrestore(&tc->lock, flags);
+ spin_unlock_irq(&tc->lock);
throttle_unlock(&pool->throttle);
wake_worker(pool);
@@ -2810,15 +2799,14 @@ static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
static void requeue_bios(struct pool *pool)
{
- unsigned long flags;
struct thin_c *tc;
rcu_read_lock();
list_for_each_entry_rcu(tc, &pool->active_thins, list) {
- spin_lock_irqsave(&tc->lock, flags);
+ spin_lock_irq(&tc->lock);
bio_list_merge(&tc->deferred_bio_list, &tc->retry_on_resume_list);
bio_list_init(&tc->retry_on_resume_list);
- spin_unlock_irqrestore(&tc->lock, flags);
+ spin_unlock_irq(&tc->lock);
}
rcu_read_unlock();
}
@@ -3412,15 +3400,14 @@ static int pool_map(struct dm_target *ti, struct bio *bio)
int r;
struct pool_c *pt = ti->private;
struct pool *pool = pt->pool;
- unsigned long flags;
/*
* As this is a singleton target, ti->begin is always zero.
*/
- spin_lock_irqsave(&pool->lock, flags);
+ spin_lock_irq(&pool->lock);
bio_set_dev(bio, pt->data_dev->bdev);
r = DM_MAPIO_REMAPPED;
- spin_unlock_irqrestore(&pool->lock, flags);
+ spin_unlock_irq(&pool->lock);
return r;
}
@@ -3591,7 +3578,6 @@ static void pool_resume(struct dm_target *ti)
{
struct pool_c *pt = ti->private;
struct pool *pool = pt->pool;
- unsigned long flags;
/*
* Must requeue active_thins' bios and then resume
@@ -3600,10 +3586,10 @@ static void pool_resume(struct dm_target *ti)
requeue_bios(pool);
pool_resume_active_thins(pool);
- spin_lock_irqsave(&pool->lock, flags);
+ spin_lock_irq(&pool->lock);
pool->low_water_triggered = false;
pool->suspended = false;
- spin_unlock_irqrestore(&pool->lock, flags);
+ spin_unlock_irq(&pool->lock);
do_waker(&pool->waker.work);
}
@@ -3612,11 +3598,10 @@ static void pool_presuspend(struct dm_target *ti)
{
struct pool_c *pt = ti->private;
struct pool *pool = pt->pool;
- unsigned long flags;
- spin_lock_irqsave(&pool->lock, flags);
+ spin_lock_irq(&pool->lock);
pool->suspended = true;
- spin_unlock_irqrestore(&pool->lock, flags);
+ spin_unlock_irq(&pool->lock);
pool_suspend_active_thins(pool);
}
@@ -3625,13 +3610,12 @@ static void pool_presuspend_undo(struct dm_target *ti)
{
struct pool_c *pt = ti->private;
struct pool *pool = pt->pool;
- unsigned long flags;
pool_resume_active_thins(pool);
- spin_lock_irqsave(&pool->lock, flags);
+ spin_lock_irq(&pool->lock);
pool->suspended = false;
- spin_unlock_irqrestore(&pool->lock, flags);
+ spin_unlock_irq(&pool->lock);
}
static void pool_postsuspend(struct dm_target *ti)
@@ -4110,11 +4094,10 @@ static void thin_put(struct thin_c *tc)
static void thin_dtr(struct dm_target *ti)
{
struct thin_c *tc = ti->private;
- unsigned long flags;
- spin_lock_irqsave(&tc->pool->lock, flags);
+ spin_lock_irq(&tc->pool->lock);
list_del_rcu(&tc->list);
- spin_unlock_irqrestore(&tc->pool->lock, flags);
+ spin_unlock_irq(&tc->pool->lock);
synchronize_rcu();
thin_put(tc);
@@ -4150,7 +4133,6 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
struct thin_c *tc;
struct dm_dev *pool_dev, *origin_dev;
struct mapped_device *pool_md;
- unsigned long flags;
mutex_lock(&dm_thin_pool_table.mutex);
@@ -4244,9 +4226,9 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
mutex_unlock(&dm_thin_pool_table.mutex);
- spin_lock_irqsave(&tc->pool->lock, flags);
+ spin_lock_irq(&tc->pool->lock);
if (tc->pool->suspended) {
- spin_unlock_irqrestore(&tc->pool->lock, flags);
+ spin_unlock_irq(&tc->pool->lock);
mutex_lock(&dm_thin_pool_table.mutex); /* reacquire for __pool_dec */
ti->error = "Unable to activate thin device while pool is suspended";
r = -EINVAL;
@@ -4255,7 +4237,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
refcount_set(&tc->refcount, 1);
init_completion(&tc->can_destroy);
list_add_tail_rcu(&tc->list, &tc->pool->active_thins);
- spin_unlock_irqrestore(&tc->pool->lock, flags);
+ spin_unlock_irq(&tc->pool->lock);
/*
* This synchronize_rcu() call is needed here otherwise we risk a
* wake_worker() call finding no bios to process (because the newly
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index d06b8aa41e26..7d727a72aa13 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -1218,7 +1218,8 @@ bio_copy:
}
} while (bio->bi_iter.bi_size);
- if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks))
+ if (unlikely(bio->bi_opf & REQ_FUA ||
+ wc->uncommitted_blocks >= wc->autocommit_blocks))
writecache_flush(wc);
else
writecache_schedule_autocommit(wc);
@@ -1561,7 +1562,7 @@ static void writecache_writeback(struct work_struct *work)
{
struct dm_writecache *wc = container_of(work, struct dm_writecache, writeback_work);
struct blk_plug plug;
- struct wc_entry *f, *g, *e = NULL;
+ struct wc_entry *f, *uninitialized_var(g), *e = NULL;
struct rb_node *node, *next_node;
struct list_head skipped;
struct writeback_list wbl;
diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index 595a73110e17..22b3cb0050a7 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -554,6 +554,7 @@ static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd,
TASK_UNINTERRUPTIBLE);
if (test_bit(DMZ_META_ERROR, &mblk->state)) {
dmz_release_mblock(zmd, mblk);
+ dmz_check_bdev(zmd->dev);
return ERR_PTR(-EIO);
}
@@ -625,6 +626,8 @@ static int dmz_rdwr_block(struct dmz_metadata *zmd, int op, sector_t block,
ret = submit_bio_wait(bio);
bio_put(bio);
+ if (ret)
+ dmz_check_bdev(zmd->dev);
return ret;
}
@@ -691,6 +694,7 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd,
TASK_UNINTERRUPTIBLE);
if (test_bit(DMZ_META_ERROR, &mblk->state)) {
clear_bit(DMZ_META_ERROR, &mblk->state);
+ dmz_check_bdev(zmd->dev);
ret = -EIO;
}
nr_mblks_submitted--;
@@ -768,7 +772,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)
/* If there are no dirty metadata blocks, just flush the device cache */
if (list_empty(&write_list)) {
ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO, NULL);
- goto out;
+ goto err;
}
/*
@@ -778,7 +782,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)
*/
ret = dmz_log_dirty_mblocks(zmd, &write_list);
if (ret)
- goto out;
+ goto err;
/*
* The log is on disk. It is now safe to update in place
@@ -786,11 +790,11 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)
*/
ret = dmz_write_dirty_mblocks(zmd, &write_list, zmd->mblk_primary);
if (ret)
- goto out;
+ goto err;
ret = dmz_write_sb(zmd, zmd->mblk_primary);
if (ret)
- goto out;
+ goto err;
while (!list_empty(&write_list)) {
mblk = list_first_entry(&write_list, struct dmz_mblock, link);
@@ -805,16 +809,20 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)
zmd->sb_gen++;
out:
- if (ret && !list_empty(&write_list)) {
- spin_lock(&zmd->mblk_lock);
- list_splice(&write_list, &zmd->mblk_dirty_list);
- spin_unlock(&zmd->mblk_lock);
- }
-
dmz_unlock_flush(zmd);
up_write(&zmd->mblk_sem);
return ret;
+
+err:
+ if (!list_empty(&write_list)) {
+ spin_lock(&zmd->mblk_lock);
+ list_splice(&write_list, &zmd->mblk_dirty_list);
+ spin_unlock(&zmd->mblk_lock);
+ }
+ if (!dmz_check_bdev(zmd->dev))
+ ret = -EIO;
+ goto out;
}
/*
@@ -1080,9 +1088,10 @@ static int dmz_load_sb(struct dmz_metadata *zmd)
/*
* Initialize a zone descriptor.
*/
-static int dmz_init_zone(struct dmz_metadata *zmd, struct dm_zone *zone,
- struct blk_zone *blkz)
+static int dmz_init_zone(struct blk_zone *blkz, unsigned int idx, void *data)
{
+ struct dmz_metadata *zmd = data;
+ struct dm_zone *zone = &zmd->zones[idx];
struct dmz_dev *dev = zmd->dev;
/* Ignore the eventual last runt (smaller) zone */
@@ -1096,26 +1105,29 @@ static int dmz_init_zone(struct dmz_metadata *zmd, struct dm_zone *zone,
atomic_set(&zone->refcount, 0);
zone->chunk = DMZ_MAP_UNMAPPED;
- if (blkz->type == BLK_ZONE_TYPE_CONVENTIONAL) {
+ switch (blkz->type) {
+ case BLK_ZONE_TYPE_CONVENTIONAL:
set_bit(DMZ_RND, &zone->flags);
zmd->nr_rnd_zones++;
- } else if (blkz->type == BLK_ZONE_TYPE_SEQWRITE_REQ ||
- blkz->type == BLK_ZONE_TYPE_SEQWRITE_PREF) {
+ break;
+ case BLK_ZONE_TYPE_SEQWRITE_REQ:
+ case BLK_ZONE_TYPE_SEQWRITE_PREF:
set_bit(DMZ_SEQ, &zone->flags);
- } else
+ break;
+ default:
return -ENXIO;
-
- if (blkz->cond == BLK_ZONE_COND_OFFLINE)
- set_bit(DMZ_OFFLINE, &zone->flags);
- else if (blkz->cond == BLK_ZONE_COND_READONLY)
- set_bit(DMZ_READ_ONLY, &zone->flags);
+ }
if (dmz_is_rnd(zone))
zone->wp_block = 0;
else
zone->wp_block = dmz_sect2blk(blkz->wp - blkz->start);
- if (!dmz_is_offline(zone) && !dmz_is_readonly(zone)) {
+ if (blkz->cond == BLK_ZONE_COND_OFFLINE)
+ set_bit(DMZ_OFFLINE, &zone->flags);
+ else if (blkz->cond == BLK_ZONE_COND_READONLY)
+ set_bit(DMZ_READ_ONLY, &zone->flags);
+ else {
zmd->nr_useable_zones++;
if (dmz_is_rnd(zone)) {
zmd->nr_rnd_zones++;
@@ -1139,23 +1151,13 @@ static void dmz_drop_zones(struct dmz_metadata *zmd)
}
/*
- * The size of a zone report in number of zones.
- * This results in 4096*64B=256KB report zones commands.
- */
-#define DMZ_REPORT_NR_ZONES 4096
-
-/*
* Allocate and initialize zone descriptors using the zone
* information from disk.
*/
static int dmz_init_zones(struct dmz_metadata *zmd)
{
struct dmz_dev *dev = zmd->dev;
- struct dm_zone *zone;
- struct blk_zone *blkz;
- unsigned int nr_blkz;
- sector_t sector = 0;
- int i, ret = 0;
+ int ret;
/* Init */
zmd->zone_bitmap_size = dev->zone_nr_blocks >> 3;
@@ -1169,54 +1171,38 @@ static int dmz_init_zones(struct dmz_metadata *zmd)
dmz_dev_info(dev, "Using %zu B for zone information",
sizeof(struct dm_zone) * dev->nr_zones);
- /* Get zone information */
- nr_blkz = DMZ_REPORT_NR_ZONES;
- blkz = kcalloc(nr_blkz, sizeof(struct blk_zone), GFP_KERNEL);
- if (!blkz) {
- ret = -ENOMEM;
- goto out;
- }
-
/*
- * Get zone information and initialize zone descriptors.
- * At the same time, determine where the super block
- * should be: first block of the first randomly writable
- * zone.
+ * Get zone information and initialize zone descriptors. At the same
+ * time, determine where the super block should be: first block of the
+ * first randomly writable zone.
*/
- zone = zmd->zones;
- while (sector < dev->capacity) {
- /* Get zone information */
- nr_blkz = DMZ_REPORT_NR_ZONES;
- ret = blkdev_report_zones(dev->bdev, sector, blkz, &nr_blkz);
- if (ret) {
- dmz_dev_err(dev, "Report zones failed %d", ret);
- goto out;
- }
+ ret = blkdev_report_zones(dev->bdev, 0, BLK_ALL_ZONES, dmz_init_zone,
+ zmd);
+ if (ret < 0) {
+ dmz_drop_zones(zmd);
+ return ret;
+ }
- if (!nr_blkz)
- break;
+ return 0;
+}
- /* Process report */
- for (i = 0; i < nr_blkz; i++) {
- ret = dmz_init_zone(zmd, zone, &blkz[i]);
- if (ret)
- goto out;
- sector += dev->zone_nr_sectors;
- zone++;
- }
- }
+static int dmz_update_zone_cb(struct blk_zone *blkz, unsigned int idx,
+ void *data)
+{
+ struct dm_zone *zone = data;
- /* The entire zone configuration of the disk should now be known */
- if (sector < dev->capacity) {
- dmz_dev_err(dev, "Failed to get correct zone information");
- ret = -ENXIO;
- }
-out:
- kfree(blkz);
- if (ret)
- dmz_drop_zones(zmd);
+ clear_bit(DMZ_OFFLINE, &zone->flags);
+ clear_bit(DMZ_READ_ONLY, &zone->flags);
+ if (blkz->cond == BLK_ZONE_COND_OFFLINE)
+ set_bit(DMZ_OFFLINE, &zone->flags);
+ else if (blkz->cond == BLK_ZONE_COND_READONLY)
+ set_bit(DMZ_READ_ONLY, &zone->flags);
- return ret;
+ if (dmz_is_seq(zone))
+ zone->wp_block = dmz_sect2blk(blkz->wp - blkz->start);
+ else
+ zone->wp_block = 0;
+ return 0;
}
/*
@@ -1224,9 +1210,7 @@ out:
*/
static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
{
- unsigned int nr_blkz = 1;
unsigned int noio_flag;
- struct blk_zone blkz;
int ret;
/*
@@ -1236,29 +1220,19 @@ static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
* GFP_NOIO was specified.
*/
noio_flag = memalloc_noio_save();
- ret = blkdev_report_zones(zmd->dev->bdev, dmz_start_sect(zmd, zone),
- &blkz, &nr_blkz);
+ ret = blkdev_report_zones(zmd->dev->bdev, dmz_start_sect(zmd, zone), 1,
+ dmz_update_zone_cb, zone);
memalloc_noio_restore(noio_flag);
- if (!nr_blkz)
+
+ if (ret == 0)
ret = -EIO;
- if (ret) {
+ if (ret < 0) {
dmz_dev_err(zmd->dev, "Get zone %u report failed",
dmz_id(zmd, zone));
+ dmz_check_bdev(zmd->dev);
return ret;
}
- clear_bit(DMZ_OFFLINE, &zone->flags);
- clear_bit(DMZ_READ_ONLY, &zone->flags);
- if (blkz.cond == BLK_ZONE_COND_OFFLINE)
- set_bit(DMZ_OFFLINE, &zone->flags);
- else if (blkz.cond == BLK_ZONE_COND_READONLY)
- set_bit(DMZ_READ_ONLY, &zone->flags);
-
- if (dmz_is_seq(zone))
- zone->wp_block = dmz_sect2blk(blkz.wp - blkz.start);
- else
- zone->wp_block = 0;
-
return 0;
}
@@ -1312,9 +1286,9 @@ static int dmz_reset_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
if (!dmz_is_empty(zone) || dmz_seq_write_err(zone)) {
struct dmz_dev *dev = zmd->dev;
- ret = blkdev_reset_zones(dev->bdev,
- dmz_start_sect(zmd, zone),
- dev->zone_nr_sectors, GFP_NOIO);
+ ret = blkdev_zone_mgmt(dev->bdev, REQ_OP_ZONE_RESET,
+ dmz_start_sect(zmd, zone),
+ dev->zone_nr_sectors, GFP_NOIO);
if (ret) {
dmz_dev_err(dev, "Reset zone %u failed %d",
dmz_id(zmd, zone), ret);
diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c
index d240d7ca8a8a..e7ace908a9b7 100644
--- a/drivers/md/dm-zoned-reclaim.c
+++ b/drivers/md/dm-zoned-reclaim.c
@@ -82,6 +82,7 @@ static int dmz_reclaim_align_wp(struct dmz_reclaim *zrc, struct dm_zone *zone,
"Align zone %u wp %llu to %llu (wp+%u) blocks failed %d",
dmz_id(zmd, zone), (unsigned long long)wp_block,
(unsigned long long)block, nr_blocks, ret);
+ dmz_check_bdev(zrc->dev);
return ret;
}
@@ -489,12 +490,7 @@ static void dmz_reclaim_work(struct work_struct *work)
ret = dmz_do_reclaim(zrc);
if (ret) {
dmz_dev_debug(zrc->dev, "Reclaim error %d\n", ret);
- if (ret == -EIO)
- /*
- * LLD might be performing some error handling sequence
- * at the underlying device. To not interfere, do not
- * attempt to schedule the next reclaim run immediately.
- */
+ if (!dmz_check_bdev(zrc->dev))
return;
}
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index d3bcc4197f5d..4574e0dedbd6 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -80,6 +80,8 @@ static inline void dmz_bio_endio(struct bio *bio, blk_status_t status)
if (status != BLK_STS_OK && bio->bi_status == BLK_STS_OK)
bio->bi_status = status;
+ if (bio->bi_status != BLK_STS_OK)
+ bioctx->target->dev->flags |= DMZ_CHECK_BDEV;
if (refcount_dec_and_test(&bioctx->ref)) {
struct dm_zone *zone = bioctx->zone;
@@ -565,32 +567,52 @@ out:
}
/*
- * Check the backing device availability. If it's on the way out,
+ * Check if the backing device is being removed. If it's on the way out,
* start failing I/O. Reclaim and metadata components also call this
* function to cleanly abort operation in the event of such failure.
*/
bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev)
{
- struct gendisk *disk;
+ if (dmz_dev->flags & DMZ_BDEV_DYING)
+ return true;
- if (!(dmz_dev->flags & DMZ_BDEV_DYING)) {
- disk = dmz_dev->bdev->bd_disk;
- if (blk_queue_dying(bdev_get_queue(dmz_dev->bdev))) {
- dmz_dev_warn(dmz_dev, "Backing device queue dying");
- dmz_dev->flags |= DMZ_BDEV_DYING;
- } else if (disk->fops->check_events) {
- if (disk->fops->check_events(disk, 0) &
- DISK_EVENT_MEDIA_CHANGE) {
- dmz_dev_warn(dmz_dev, "Backing device offline");
- dmz_dev->flags |= DMZ_BDEV_DYING;
- }
- }
+ if (dmz_dev->flags & DMZ_CHECK_BDEV)
+ return !dmz_check_bdev(dmz_dev);
+
+ if (blk_queue_dying(bdev_get_queue(dmz_dev->bdev))) {
+ dmz_dev_warn(dmz_dev, "Backing device queue dying");
+ dmz_dev->flags |= DMZ_BDEV_DYING;
}
return dmz_dev->flags & DMZ_BDEV_DYING;
}
/*
+ * Check the backing device availability. This detects such events as
+ * backing device going offline due to errors, media removals, etc.
+ * This check is less efficient than dmz_bdev_is_dying() and should
+ * only be performed as a part of error handling.
+ */
+bool dmz_check_bdev(struct dmz_dev *dmz_dev)
+{
+ struct gendisk *disk;
+
+ dmz_dev->flags &= ~DMZ_CHECK_BDEV;
+
+ if (dmz_bdev_is_dying(dmz_dev))
+ return false;
+
+ disk = dmz_dev->bdev->bd_disk;
+ if (disk->fops->check_events &&
+ disk->fops->check_events(disk, 0) & DISK_EVENT_MEDIA_CHANGE) {
+ dmz_dev_warn(dmz_dev, "Backing device offline");
+ dmz_dev->flags |= DMZ_BDEV_DYING;
+ }
+
+ return !(dmz_dev->flags & DMZ_BDEV_DYING);
+}
+
+/*
* Process a new BIO.
*/
static int dmz_map(struct dm_target *ti, struct bio *bio)
@@ -902,8 +924,8 @@ static int dmz_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
{
struct dmz_target *dmz = ti->private;
- if (dmz_bdev_is_dying(dmz->dev))
- return -ENODEV;
+ if (!dmz_check_bdev(dmz->dev))
+ return -EIO;
*bdev = dmz->dev->bdev;
diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h
index d8e70b0ade35..5b5e493d479c 100644
--- a/drivers/md/dm-zoned.h
+++ b/drivers/md/dm-zoned.h
@@ -72,6 +72,7 @@ struct dmz_dev {
/* Device flags. */
#define DMZ_BDEV_DYING (1 << 0)
+#define DMZ_CHECK_BDEV (2 << 0)
/*
* Zone descriptor.
@@ -255,5 +256,6 @@ void dmz_schedule_reclaim(struct dmz_reclaim *zrc);
* Functions defined in dm-zoned-target.c
*/
bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev);
+bool dmz_check_bdev(struct dmz_dev *dmz_dev);
#endif /* DM_ZONED_H */
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 1a5e328c443a..e8f9661a10a1 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -440,14 +440,48 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
return dm_get_geometry(md, geo);
}
+#ifdef CONFIG_BLK_DEV_ZONED
+int dm_report_zones_cb(struct blk_zone *zone, unsigned int idx, void *data)
+{
+ struct dm_report_zones_args *args = data;
+ sector_t sector_diff = args->tgt->begin - args->start;
+
+ /*
+ * Ignore zones beyond the target range.
+ */
+ if (zone->start >= args->start + args->tgt->len)
+ return 0;
+
+ /*
+ * Remap the start sector and write pointer position of the zone
+ * to match its position in the target range.
+ */
+ zone->start += sector_diff;
+ if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) {
+ if (zone->cond == BLK_ZONE_COND_FULL)
+ zone->wp = zone->start + zone->len;
+ else if (zone->cond == BLK_ZONE_COND_EMPTY)
+ zone->wp = zone->start;
+ else
+ zone->wp += sector_diff;
+ }
+
+ args->next_sector = zone->start + zone->len;
+ return args->orig_cb(zone, args->zone_idx++, args->orig_data);
+}
+EXPORT_SYMBOL_GPL(dm_report_zones_cb);
+
static int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
- struct blk_zone *zones, unsigned int *nr_zones)
+ unsigned int nr_zones, report_zones_cb cb, void *data)
{
-#ifdef CONFIG_BLK_DEV_ZONED
struct mapped_device *md = disk->private_data;
- struct dm_target *tgt;
struct dm_table *map;
int srcu_idx, ret;
+ struct dm_report_zones_args args = {
+ .next_sector = sector,
+ .orig_data = data,
+ .orig_cb = cb,
+ };
if (dm_suspended_md(md))
return -EAGAIN;
@@ -456,38 +490,30 @@ static int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
if (!map)
return -EIO;
- tgt = dm_table_find_target(map, sector);
- if (!tgt) {
- ret = -EIO;
- goto out;
- }
+ do {
+ struct dm_target *tgt;
- /*
- * If we are executing this, we already know that the block device
- * is a zoned device and so each target should have support for that
- * type of drive. A missing report_zones method means that the target
- * driver has a problem.
- */
- if (WARN_ON(!tgt->type->report_zones)) {
- ret = -EIO;
- goto out;
- }
+ tgt = dm_table_find_target(map, args.next_sector);
+ if (WARN_ON_ONCE(!tgt->type->report_zones)) {
+ ret = -EIO;
+ goto out;
+ }
- /*
- * blkdev_report_zones() will loop and call this again to cover all the
- * zones of the target, eventually moving on to the next target.
- * So there is no need to loop here trying to fill the entire array
- * of zones.
- */
- ret = tgt->type->report_zones(tgt, sector, zones, nr_zones);
+ args.tgt = tgt;
+ ret = tgt->type->report_zones(tgt, &args, nr_zones);
+ if (ret < 0)
+ goto out;
+ } while (args.zone_idx < nr_zones &&
+ args.next_sector < get_capacity(disk));
+ ret = args.zone_idx;
out:
dm_put_live_table(md, srcu_idx);
return ret;
-#else
- return -ENOTSUPP;
-#endif
}
+#else
+#define dm_blk_report_zones NULL
+#endif /* CONFIG_BLK_DEV_ZONED */
static int dm_prepare_ioctl(struct mapped_device *md, int *srcu_idx,
struct block_device **bdev)
@@ -1174,7 +1200,8 @@ static size_t dm_dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
/*
* A target may call dm_accept_partial_bio only from the map routine. It is
- * allowed for all bio types except REQ_PREFLUSH and REQ_OP_ZONE_RESET.
+ * allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_RESET,
+ * REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE and REQ_OP_ZONE_FINISH.
*
* dm_accept_partial_bio informs the dm that the target only wants to process
* additional n_sectors sectors of the bio and the rest of the data should be
@@ -1212,54 +1239,6 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
}
EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
-/*
- * The zone descriptors obtained with a zone report indicate
- * zone positions within the underlying device of the target. The zone
- * descriptors must be remapped to match their position within the dm device.
- * The caller target should obtain the zones information using
- * blkdev_report_zones() to ensure that remapping for partition offset is
- * already handled.
- */
-void dm_remap_zone_report(struct dm_target *ti, sector_t start,
- struct blk_zone *zones, unsigned int *nr_zones)
-{
-#ifdef CONFIG_BLK_DEV_ZONED
- struct blk_zone *zone;
- unsigned int nrz = *nr_zones;
- int i;
-
- /*
- * Remap the start sector and write pointer position of the zones in
- * the array. Since we may have obtained from the target underlying
- * device more zones that the target size, also adjust the number
- * of zones.
- */
- for (i = 0; i < nrz; i++) {
- zone = zones + i;
- if (zone->start >= start + ti->len) {
- memset(zone, 0, sizeof(struct blk_zone) * (nrz - i));
- break;
- }
-
- zone->start = zone->start + ti->begin - start;
- if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
- continue;
-
- if (zone->cond == BLK_ZONE_COND_FULL)
- zone->wp = zone->start + zone->len;
- else if (zone->cond == BLK_ZONE_COND_EMPTY)
- zone->wp = zone->start;
- else
- zone->wp = zone->wp + ti->begin - start;
- }
-
- *nr_zones = i;
-#else /* !CONFIG_BLK_DEV_ZONED */
- *nr_zones = 0;
-#endif
-}
-EXPORT_SYMBOL_GPL(dm_remap_zone_report);
-
static blk_qc_t __map_bio(struct dm_target_io *tio)
{
int r;
@@ -1627,7 +1606,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
ci.sector_count = 0;
error = __send_empty_flush(&ci);
/* dec_pending submits any data associated with flush */
- } else if (bio_op(bio) == REQ_OP_ZONE_RESET) {
+ } else if (op_is_zone_mgmt(bio_op(bio))) {
ci.bio = bio;
ci.sector_count = 0;
error = __split_and_process_non_flush(&ci);
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index b092c7b5282f..3ad18246fcb3 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -2139,6 +2139,7 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
memcpy(page_address(store.sb_page),
page_address(bitmap->storage.sb_page),
sizeof(bitmap_super_t));
+ spin_lock_irq(&bitmap->counts.lock);
md_bitmap_file_unmap(&bitmap->storage);
bitmap->storage = store;
@@ -2154,7 +2155,6 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
blocks = min(old_counts.chunks << old_counts.chunkshift,
chunks << chunkshift);
- spin_lock_irq(&bitmap->counts.lock);
/* For cluster raid, need to pre-allocate bitmap */
if (mddev_is_clustered(bitmap->mddev)) {
unsigned long page;
diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
index c766c559d36d..26c75c0199fa 100644
--- a/drivers/md/md-linear.c
+++ b/drivers/md/md-linear.c
@@ -244,10 +244,9 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
sector_t start_sector, end_sector, data_offset;
sector_t bio_sector = bio->bi_iter.bi_sector;
- if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
- md_flush_request(mddev, bio);
+ if (unlikely(bio->bi_opf & REQ_PREFLUSH)
+ && md_flush_request(mddev, bio))
return true;
- }
tmp_dev = which_dev(mddev, bio_sector);
start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c
index 6780938d2991..152f9e65a226 100644
--- a/drivers/md/md-multipath.c
+++ b/drivers/md/md-multipath.c
@@ -104,10 +104,9 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
struct multipath_bh * mp_bh;
struct multipath_info *multipath;
- if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
- md_flush_request(mddev, bio);
+ if (unlikely(bio->bi_opf & REQ_PREFLUSH)
+ && md_flush_request(mddev, bio))
return true;
- }
mp_bh = mempool_alloc(&conf->pool, GFP_NOIO);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 1be7abeb24fd..805b33e27496 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -550,7 +550,13 @@ static void md_submit_flush_data(struct work_struct *ws)
}
}
-void md_flush_request(struct mddev *mddev, struct bio *bio)
+/*
+ * Manages consolidation of flushes and submitting any flushes needed for
+ * a bio with REQ_PREFLUSH. Returns true if the bio is finished or is
+ * being finished in another context. Returns false if the flushing is
+ * complete but still needs the I/O portion of the bio to be processed.
+ */
+bool md_flush_request(struct mddev *mddev, struct bio *bio)
{
ktime_t start = ktime_get_boottime();
spin_lock_irq(&mddev->lock);
@@ -575,9 +581,10 @@ void md_flush_request(struct mddev *mddev, struct bio *bio)
bio_endio(bio);
else {
bio->bi_opf &= ~REQ_PREFLUSH;
- mddev->pers->make_request(mddev, bio);
+ return false;
}
}
+ return true;
}
EXPORT_SYMBOL(md_flush_request);
@@ -1098,6 +1105,7 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
mdp_super_t *sb;
int ret;
+ bool spare_disk = true;
/*
* Calculate the position of the superblock (512byte sectors),
@@ -1148,8 +1156,18 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
else
rdev->desc_nr = sb->this_disk.number;
+ /* not spare disk, or LEVEL_MULTIPATH */
+ if (sb->level == LEVEL_MULTIPATH ||
+ (rdev->desc_nr >= 0 &&
+ sb->disks[rdev->desc_nr].state &
+ ((1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE))))
+ spare_disk = false;
+
if (!refdev) {
- ret = 1;
+ if (!spare_disk)
+ ret = 1;
+ else
+ ret = 0;
} else {
__u64 ev1, ev2;
mdp_super_t *refsb = page_address(refdev->sb_page);
@@ -1165,7 +1183,8 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
}
ev1 = md_event(sb);
ev2 = md_event(refsb);
- if (ev1 > ev2)
+
+ if (!spare_disk && ev1 > ev2)
ret = 1;
else
ret = 0;
@@ -1525,6 +1544,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
sector_t sectors;
char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
int bmask;
+ bool spare_disk = true;
/*
* Calculate the position of the superblock in 512byte sectors.
@@ -1658,8 +1678,19 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
sb->level != 0)
return -EINVAL;
+ /* not spare disk, or LEVEL_MULTIPATH */
+ if (sb->level == cpu_to_le32(LEVEL_MULTIPATH) ||
+ (rdev->desc_nr >= 0 &&
+ rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
+ (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
+ le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL)))
+ spare_disk = false;
+
if (!refdev) {
- ret = 1;
+ if (!spare_disk)
+ ret = 1;
+ else
+ ret = 0;
} else {
__u64 ev1, ev2;
struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
@@ -1676,7 +1707,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
ev1 = le64_to_cpu(sb->events);
ev2 = le64_to_cpu(refsb->events);
- if (ev1 > ev2)
+ if (!spare_disk && ev1 > ev2)
ret = 1;
else
ret = 0;
@@ -3597,7 +3628,7 @@ abort_free:
* Check a full RAID array for plausibility
*/
-static void analyze_sbs(struct mddev *mddev)
+static int analyze_sbs(struct mddev *mddev)
{
int i;
struct md_rdev *rdev, *freshest, *tmp;
@@ -3618,6 +3649,12 @@ static void analyze_sbs(struct mddev *mddev)
md_kick_rdev_from_array(rdev);
}
+ /* Cannot find a valid fresh disk */
+ if (!freshest) {
+ pr_warn("md: cannot find a valid disk\n");
+ return -EINVAL;
+ }
+
super_types[mddev->major_version].
validate_super(mddev, freshest);
@@ -3652,6 +3689,8 @@ static void analyze_sbs(struct mddev *mddev)
clear_bit(In_sync, &rdev->flags);
}
}
+
+ return 0;
}
/* Read a fixed-point number.
@@ -5570,7 +5609,9 @@ int md_run(struct mddev *mddev)
if (!mddev->raid_disks) {
if (!mddev->persistent)
return -EINVAL;
- analyze_sbs(mddev);
+ err = analyze_sbs(mddev);
+ if (err)
+ return -EINVAL;
}
if (mddev->level != LEVEL_NONE)
diff --git a/drivers/md/md.h b/drivers/md/md.h
index c5e3ff398b59..5f86f8adb0a4 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -550,7 +550,7 @@ struct md_personality
int level;
struct list_head list;
struct module *owner;
- bool (*make_request)(struct mddev *mddev, struct bio *bio);
+ bool __must_check (*make_request)(struct mddev *mddev, struct bio *bio);
/*
* start up works that do NOT require md_thread. tasks that
* requires md_thread should go into start()
@@ -703,7 +703,7 @@ extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
extern void md_finish_reshape(struct mddev *mddev);
extern int mddev_congested(struct mddev *mddev, int bits);
-extern void md_flush_request(struct mddev *mddev, struct bio *bio);
+extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
sector_t sector, int size, struct page *page);
extern int md_super_wait(struct mddev *mddev);
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 1e772287b1c8..b7c20979bd19 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -575,10 +575,9 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
unsigned chunk_sects;
unsigned sectors;
- if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
- md_flush_request(mddev, bio);
+ if (unlikely(bio->bi_opf & REQ_PREFLUSH)
+ && md_flush_request(mddev, bio))
return true;
- }
if (unlikely((bio_op(bio) == REQ_OP_DISCARD))) {
raid0_handle_discard(mddev, bio);
@@ -615,7 +614,7 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
tmp_dev = map_sector(mddev, zone, sector, &sector);
break;
default:
- WARN("md/raid0:%s: Invalid layout\n", mdname(mddev));
+ WARN(1, "md/raid0:%s: Invalid layout\n", mdname(mddev));
bio_io_error(bio);
return true;
}
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 0466ee2453b4..a409ab6f30bc 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -819,6 +819,7 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio)
else
generic_make_request(bio);
bio = next;
+ cond_resched();
}
}
@@ -1567,10 +1568,9 @@ static bool raid1_make_request(struct mddev *mddev, struct bio *bio)
{
sector_t sectors;
- if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
- md_flush_request(mddev, bio);
+ if (unlikely(bio->bi_opf & REQ_PREFLUSH)
+ && md_flush_request(mddev, bio))
return true;
- }
/*
* There is a limit to the maximum size, but
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 299c7b1c9718..ec136e44aef7 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -191,7 +191,7 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
out_free_pages:
while (--j >= 0)
- resync_free_pages(&rps[j * 2]);
+ resync_free_pages(&rps[j]);
j = 0;
out_free_bio:
@@ -1525,10 +1525,9 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
int chunk_sects = chunk_mask + 1;
int sectors = bio_sectors(bio);
- if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
- md_flush_request(mddev, bio);
+ if (unlikely(bio->bi_opf & REQ_PREFLUSH)
+ && md_flush_request(mddev, bio))
return true;
- }
if (!md_write_start(mddev, bio))
return false;
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 18a4064a61a8..cab5b1352892 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -1404,7 +1404,7 @@ int ppl_init_log(struct r5conf *conf)
atomic64_set(&ppl_conf->seq, 0);
INIT_LIST_HEAD(&ppl_conf->no_mem_stripes);
spin_lock_init(&ppl_conf->no_mem_stripes_lock);
- ppl_conf->write_hint = RWF_WRITE_LIFE_NOT_SET;
+ ppl_conf->write_hint = RWH_WRITE_LIFE_NOT_SET;
if (!mddev->external) {
ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid));
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 223e97ab27e6..f0fc538bfe59 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1134,7 +1134,7 @@ again:
bi->bi_iter.bi_size = STRIPE_SIZE;
bi->bi_write_hint = sh->dev[i].write_hint;
if (!rrdev)
- sh->dev[i].write_hint = RWF_WRITE_LIFE_NOT_SET;
+ sh->dev[i].write_hint = RWH_WRITE_LIFE_NOT_SET;
/*
* If this is discard request, set bi_vcnt 0. We don't
* want to confuse SCSI because SCSI will replace payload
@@ -1187,7 +1187,7 @@ again:
rbi->bi_io_vec[0].bv_offset = 0;
rbi->bi_iter.bi_size = STRIPE_SIZE;
rbi->bi_write_hint = sh->dev[i].write_hint;
- sh->dev[i].write_hint = RWF_WRITE_LIFE_NOT_SET;
+ sh->dev[i].write_hint = RWH_WRITE_LIFE_NOT_SET;
/*
* If this is discard request, set bi_vcnt 0. We don't
* want to confuse SCSI because SCSI will replace payload
@@ -5592,8 +5592,8 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
if (ret == 0)
return true;
if (ret == -ENODEV) {
- md_flush_request(mddev, bi);
- return true;
+ if (md_flush_request(mddev, bi))
+ return true;
}
/* ret == -EAGAIN, fallback */
/*