diff options
author | 2025-08-22 09:29:51 -0400 | |
---|---|---|
committer | 2025-08-22 09:29:51 -0400 | |
commit | a2e94e80790bb103ca72f8a2991f43c80474a4b6 (patch) | |
tree | 5f7e444890f5d36d2d6df904331386c5f9c7c8ad | |
parent | Merge tag 'io_uring-6.17-20250822' of git://git.kernel.dk/linux (diff) | |
parent | block: avoid cpu_hotplug_lock depedency on freeze_lock (diff) | |
download | wireguard-linux-a2e94e80790bb103ca72f8a2991f43c80474a4b6.tar.xz wireguard-linux-a2e94e80790bb103ca72f8a2991f43c80474a4b6.zip |
Merge tag 'block-6.17-20250822' of git://git.kernel.dk/linux
Pull block fixes from Jens Axboe:
"A set of fixes for block that should go into this tree. A bit larger
than what I usually have at this point in time, a lot of that is the
continued fixing of the lockdep annotation for queue freezing that we
recently added, which has highlighted a number of little issues here
and there. This contains:
- MD pull request via Yu:
- Add a legacy_async_del_gendisk mode, to prevent a user tools
regression. New user tools releases will not use such a mode,
the old release with a new kernel now will have warning about
deprecated behavior, and we prepare to remove this legacy mode
after about a year later
- The rename in kernel causing user tools build failure, revert
the rename in mdp_superblock_s
- Fix a regression that interrupted resync can be shown as
recover from mdstat or sysfs
- Improve file size detection for loop, particularly for networked
file systems, by using getattr to get the size rather than the
cached inode size.
- Hotplug CPU lock vs queue freeze fix
- Lockdep fix while updating the number of hardware queues
- Fix stacking for PI devices
- Silence bio_check_eod() for the known case of device removal where
the size is truncated to 0 sectors"
* tag 'block-6.17-20250822' of git://git.kernel.dk/linux:
block: avoid cpu_hotplug_lock depedency on freeze_lock
block: decrement block_rq_qos static key in rq_qos_del()
block: skip q->rq_qos check in rq_qos_done_bio()
blk-mq: fix lockdep warning in __blk_mq_update_nr_hw_queues
block: tone down bio_check_eod
loop: use vfs_getattr_nosec for accurate file size
loop: Consolidate size calculation logic into lo_calculate_size()
block: remove newlines from the warnings in blk_validate_integrity_limits
block: handle pi_tuple_size in queue_limits_stack_integrity
selftests: ublk: Use ARRAY_SIZE() macro to improve code
md: fix sync_action incorrect display during resync
md: add helper rdev_needs_recovery()
md: keep recovery_cp in mdp_superblock_s
md: add legacy_async_del_gendisk mode
-rw-r--r-- | block/blk-core.c | 2 | ||||
-rw-r--r-- | block/blk-mq-debugfs.c | 1 | ||||
-rw-r--r-- | block/blk-mq.c | 13 | ||||
-rw-r--r-- | block/blk-rq-qos.c | 8 | ||||
-rw-r--r-- | block/blk-rq-qos.h | 48 | ||||
-rw-r--r-- | block/blk-settings.c | 12 | ||||
-rw-r--r-- | drivers/block/loop.c | 39 | ||||
-rw-r--r-- | drivers/md/md.c | 122 | ||||
-rw-r--r-- | include/linux/blkdev.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/raid/md_p.h | 2 | ||||
-rw-r--r-- | tools/testing/selftests/ublk/kublk.c | 4 |
11 files changed, 169 insertions, 83 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index fdac48aec5ef..4201504158a1 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -557,7 +557,7 @@ static inline int bio_check_eod(struct bio *bio) sector_t maxsector = bdev_nr_sectors(bio->bi_bdev); unsigned int nr_sectors = bio_sectors(bio); - if (nr_sectors && + if (nr_sectors && maxsector && (nr_sectors > maxsector || bio->bi_iter.bi_sector > maxsector - nr_sectors)) { pr_info_ratelimited("%s: attempt to access beyond end of device\n" diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 7ed3e71f2fc0..32c65efdda46 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -95,6 +95,7 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(SQ_SCHED), QUEUE_FLAG_NAME(DISABLE_WBT_DEF), QUEUE_FLAG_NAME(NO_ELV_SWITCH), + QUEUE_FLAG_NAME(QOS_ENABLED), }; #undef QUEUE_FLAG_NAME diff --git a/block/blk-mq.c b/block/blk-mq.c index b67d6c02eceb..ba3a4b77f578 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -5033,6 +5033,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, unsigned int memflags; int i; struct xarray elv_tbl, et_tbl; + bool queues_frozen = false; lockdep_assert_held(&set->tag_list_lock); @@ -5056,9 +5057,6 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, blk_mq_sysfs_unregister_hctxs(q); } - list_for_each_entry(q, &set->tag_list, tag_set_list) - blk_mq_freeze_queue_nomemsave(q); - /* * Switch IO scheduler to 'none', cleaning up the data associated * with the previous scheduler. We will switch back once we are done @@ -5068,6 +5066,9 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, if (blk_mq_elv_switch_none(q, &elv_tbl)) goto switch_back; + list_for_each_entry(q, &set->tag_list, tag_set_list) + blk_mq_freeze_queue_nomemsave(q); + queues_frozen = true; if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0) goto switch_back; @@ -5091,8 +5092,12 @@ fallback: } switch_back: /* The blk_mq_elv_switch_back unfreezes queue for us. */ - list_for_each_entry(q, &set->tag_list, tag_set_list) + list_for_each_entry(q, &set->tag_list, tag_set_list) { + /* switch_back expects queue to be frozen */ + if (!queues_frozen) + blk_mq_freeze_queue_nomemsave(q); blk_mq_elv_switch_back(q, &elv_tbl, &et_tbl); + } list_for_each_entry(q, &set->tag_list, tag_set_list) { blk_mq_sysfs_register_hctxs(q); diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index 848591fb3c57..654478dfbc20 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -2,8 +2,6 @@ #include "blk-rq-qos.h" -__read_mostly DEFINE_STATIC_KEY_FALSE(block_rq_qos); - /* * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded, * false if 'v' + 1 would be bigger than 'below'. @@ -319,8 +317,8 @@ void rq_qos_exit(struct request_queue *q) struct rq_qos *rqos = q->rq_qos; q->rq_qos = rqos->next; rqos->ops->exit(rqos); - static_branch_dec(&block_rq_qos); } + blk_queue_flag_clear(QUEUE_FLAG_QOS_ENABLED, q); mutex_unlock(&q->rq_qos_mutex); } @@ -346,7 +344,7 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, goto ebusy; rqos->next = q->rq_qos; q->rq_qos = rqos; - static_branch_inc(&block_rq_qos); + blk_queue_flag_set(QUEUE_FLAG_QOS_ENABLED, q); blk_mq_unfreeze_queue(q, memflags); @@ -377,6 +375,8 @@ void rq_qos_del(struct rq_qos *rqos) break; } } + if (!q->rq_qos) + blk_queue_flag_clear(QUEUE_FLAG_QOS_ENABLED, q); blk_mq_unfreeze_queue(q, memflags); mutex_lock(&q->debugfs_mutex); diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 39749f4066fb..1fe22000a379 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -12,7 +12,6 @@ #include "blk-mq-debugfs.h" struct blk_mq_debugfs_attr; -extern struct static_key_false block_rq_qos; enum rq_qos_id { RQ_QOS_WBT, @@ -113,43 +112,55 @@ void __rq_qos_queue_depth_changed(struct rq_qos *rqos); static inline void rq_qos_cleanup(struct request_queue *q, struct bio *bio) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) __rq_qos_cleanup(q->rq_qos, bio); } static inline void rq_qos_done(struct request_queue *q, struct request *rq) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos && - !blk_rq_is_passthrough(rq)) + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos && !blk_rq_is_passthrough(rq)) __rq_qos_done(q->rq_qos, rq); } static inline void rq_qos_issue(struct request_queue *q, struct request *rq) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) __rq_qos_issue(q->rq_qos, rq); } static inline void rq_qos_requeue(struct request_queue *q, struct request *rq) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) __rq_qos_requeue(q->rq_qos, rq); } static inline void rq_qos_done_bio(struct bio *bio) { - if (static_branch_unlikely(&block_rq_qos) && - bio->bi_bdev && (bio_flagged(bio, BIO_QOS_THROTTLED) || - bio_flagged(bio, BIO_QOS_MERGED))) { - struct request_queue *q = bdev_get_queue(bio->bi_bdev); - if (q->rq_qos) - __rq_qos_done_bio(q->rq_qos, bio); - } + struct request_queue *q; + + if (!bio->bi_bdev || (!bio_flagged(bio, BIO_QOS_THROTTLED) && + !bio_flagged(bio, BIO_QOS_MERGED))) + return; + + q = bdev_get_queue(bio->bi_bdev); + + /* + * If a bio has BIO_QOS_xxx set, it implicitly implies that + * q->rq_qos is present. So, we skip re-checking q->rq_qos + * here as an extra optimization and directly call + * __rq_qos_done_bio(). + */ + __rq_qos_done_bio(q->rq_qos, bio); } static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) { + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) { bio_set_flag(bio, BIO_QOS_THROTTLED); __rq_qos_throttle(q->rq_qos, bio); } @@ -158,14 +169,16 @@ static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio) static inline void rq_qos_track(struct request_queue *q, struct request *rq, struct bio *bio) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) __rq_qos_track(q->rq_qos, rq, bio); } static inline void rq_qos_merge(struct request_queue *q, struct request *rq, struct bio *bio) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) { + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) { bio_set_flag(bio, BIO_QOS_MERGED); __rq_qos_merge(q->rq_qos, rq, bio); } @@ -173,7 +186,8 @@ static inline void rq_qos_merge(struct request_queue *q, struct request *rq, static inline void rq_qos_queue_depth_changed(struct request_queue *q) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) __rq_qos_queue_depth_changed(q->rq_qos); } diff --git a/block/blk-settings.c b/block/blk-settings.c index 07874e9b609f..d6438e6c276d 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -157,16 +157,14 @@ static int blk_validate_integrity_limits(struct queue_limits *lim) switch (bi->csum_type) { case BLK_INTEGRITY_CSUM_NONE: if (bi->pi_tuple_size) { - pr_warn("pi_tuple_size must be 0 when checksum type \ - is none\n"); + pr_warn("pi_tuple_size must be 0 when checksum type is none\n"); return -EINVAL; } break; case BLK_INTEGRITY_CSUM_CRC: case BLK_INTEGRITY_CSUM_IP: if (bi->pi_tuple_size != sizeof(struct t10_pi_tuple)) { - pr_warn("pi_tuple_size mismatch for T10 PI: expected \ - %zu, got %u\n", + pr_warn("pi_tuple_size mismatch for T10 PI: expected %zu, got %u\n", sizeof(struct t10_pi_tuple), bi->pi_tuple_size); return -EINVAL; @@ -174,8 +172,7 @@ static int blk_validate_integrity_limits(struct queue_limits *lim) break; case BLK_INTEGRITY_CSUM_CRC64: if (bi->pi_tuple_size != sizeof(struct crc64_pi_tuple)) { - pr_warn("pi_tuple_size mismatch for CRC64 PI: \ - expected %zu, got %u\n", + pr_warn("pi_tuple_size mismatch for CRC64 PI: expected %zu, got %u\n", sizeof(struct crc64_pi_tuple), bi->pi_tuple_size); return -EINVAL; @@ -972,6 +969,8 @@ bool queue_limits_stack_integrity(struct queue_limits *t, goto incompatible; if (ti->csum_type != bi->csum_type) goto incompatible; + if (ti->pi_tuple_size != bi->pi_tuple_size) + goto incompatible; if ((ti->flags & BLK_INTEGRITY_REF_TAG) != (bi->flags & BLK_INTEGRITY_REF_TAG)) goto incompatible; @@ -980,6 +979,7 @@ bool queue_limits_stack_integrity(struct queue_limits *t, ti->flags |= (bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE) | (bi->flags & BLK_INTEGRITY_REF_TAG); ti->csum_type = bi->csum_type; + ti->pi_tuple_size = bi->pi_tuple_size; ti->metadata_size = bi->metadata_size; ti->pi_offset = bi->pi_offset; ti->interval_exp = bi->interval_exp; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 1b6ee91f8eb9..57263c273f0f 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -137,20 +137,29 @@ static void loop_global_unlock(struct loop_device *lo, bool global) static int max_part; static int part_shift; -static loff_t get_size(loff_t offset, loff_t sizelimit, struct file *file) +static loff_t lo_calculate_size(struct loop_device *lo, struct file *file) { + struct kstat stat; loff_t loopsize; + int ret; - /* Compute loopsize in bytes */ - loopsize = i_size_read(file->f_mapping->host); - if (offset > 0) - loopsize -= offset; + /* + * Get the accurate file size. This provides better results than + * cached inode data, particularly for network filesystems where + * metadata may be stale. + */ + ret = vfs_getattr_nosec(&file->f_path, &stat, STATX_SIZE, 0); + if (ret) + return 0; + + loopsize = stat.size; + if (lo->lo_offset > 0) + loopsize -= lo->lo_offset; /* offset is beyond i_size, weird but possible */ if (loopsize < 0) return 0; - - if (sizelimit > 0 && sizelimit < loopsize) - loopsize = sizelimit; + if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize) + loopsize = lo->lo_sizelimit; /* * Unfortunately, if we want to do I/O on the device, * the number of 512-byte sectors has to fit into a sector_t. @@ -158,11 +167,6 @@ static loff_t get_size(loff_t offset, loff_t sizelimit, struct file *file) return loopsize >> 9; } -static loff_t get_loop_size(struct loop_device *lo, struct file *file) -{ - return get_size(lo->lo_offset, lo->lo_sizelimit, file); -} - /* * We support direct I/O only if lo_offset is aligned with the logical I/O size * of backing device, and the logical block size of loop is bigger than that of @@ -569,7 +573,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, error = -EINVAL; /* size of the new backing store needs to be the same */ - if (get_loop_size(lo, file) != get_loop_size(lo, old_file)) + if (lo_calculate_size(lo, file) != lo_calculate_size(lo, old_file)) goto out_err; /* @@ -1063,7 +1067,7 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode, loop_update_dio(lo); loop_sysfs_init(lo); - size = get_loop_size(lo, file); + size = lo_calculate_size(lo, file); loop_set_size(lo, size); /* Order wrt reading lo_state in loop_validate_file(). */ @@ -1255,8 +1259,7 @@ out_unfreeze: if (partscan) clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state); if (!err && size_changed) { - loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit, - lo->lo_backing_file); + loff_t new_size = lo_calculate_size(lo, lo->lo_backing_file); loop_set_size(lo, new_size); } out_unlock: @@ -1399,7 +1402,7 @@ static int loop_set_capacity(struct loop_device *lo) if (unlikely(lo->lo_state != Lo_bound)) return -ENXIO; - size = get_loop_size(lo, lo->lo_backing_file); + size = lo_calculate_size(lo, lo->lo_backing_file); loop_set_size(lo, size); return 0; diff --git a/drivers/md/md.c b/drivers/md/md.c index ac85ec73a409..1baaf52c603c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -339,6 +339,7 @@ static int start_readonly; * so all the races disappear. */ static bool create_on_open = true; +static bool legacy_async_del_gendisk = true; /* * We have a system wide 'event count' that is incremented @@ -877,15 +878,18 @@ void mddev_unlock(struct mddev *mddev) export_rdev(rdev, mddev); } - /* Call del_gendisk after release reconfig_mutex to avoid - * deadlock (e.g. call del_gendisk under the lock and an - * access to sysfs files waits the lock) - * And MD_DELETED is only used for md raid which is set in - * do_md_stop. dm raid only uses md_stop to stop. So dm raid - * doesn't need to check MD_DELETED when getting reconfig lock - */ - if (test_bit(MD_DELETED, &mddev->flags)) - del_gendisk(mddev->gendisk); + if (!legacy_async_del_gendisk) { + /* + * Call del_gendisk after release reconfig_mutex to avoid + * deadlock (e.g. call del_gendisk under the lock and an + * access to sysfs files waits the lock) + * And MD_DELETED is only used for md raid which is set in + * do_md_stop. dm raid only uses md_stop to stop. So dm raid + * doesn't need to check MD_DELETED when getting reconfig lock + */ + if (test_bit(MD_DELETED, &mddev->flags)) + del_gendisk(mddev->gendisk); + } } EXPORT_SYMBOL_GPL(mddev_unlock); @@ -1419,7 +1423,7 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *freshest, stru else { if (sb->events_hi == sb->cp_events_hi && sb->events_lo == sb->cp_events_lo) { - mddev->resync_offset = sb->resync_offset; + mddev->resync_offset = sb->recovery_cp; } else mddev->resync_offset = 0; } @@ -1547,13 +1551,13 @@ static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev) mddev->minor_version = sb->minor_version; if (mddev->in_sync) { - sb->resync_offset = mddev->resync_offset; + sb->recovery_cp = mddev->resync_offset; sb->cp_events_hi = (mddev->events>>32); sb->cp_events_lo = (u32)mddev->events; if (mddev->resync_offset == MaxSector) sb->state = (1<< MD_SB_CLEAN); } else - sb->resync_offset = 0; + sb->recovery_cp = 0; sb->layout = mddev->layout; sb->chunk_size = mddev->chunk_sectors << 9; @@ -4835,9 +4839,42 @@ out_unlock: static struct md_sysfs_entry md_metadata = __ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store); +static bool rdev_needs_recovery(struct md_rdev *rdev, sector_t sectors) +{ + return rdev->raid_disk >= 0 && + !test_bit(Journal, &rdev->flags) && + !test_bit(Faulty, &rdev->flags) && + !test_bit(In_sync, &rdev->flags) && + rdev->recovery_offset < sectors; +} + +static enum sync_action md_get_active_sync_action(struct mddev *mddev) +{ + struct md_rdev *rdev; + bool is_recover = false; + + if (mddev->resync_offset < MaxSector) + return ACTION_RESYNC; + + if (mddev->reshape_position != MaxSector) + return ACTION_RESHAPE; + + rcu_read_lock(); + rdev_for_each_rcu(rdev, mddev) { + if (rdev_needs_recovery(rdev, MaxSector)) { + is_recover = true; + break; + } + } + rcu_read_unlock(); + + return is_recover ? ACTION_RECOVER : ACTION_IDLE; +} + enum sync_action md_sync_action(struct mddev *mddev) { unsigned long recovery = mddev->recovery; + enum sync_action active_action; /* * frozen has the highest priority, means running sync_thread will be @@ -4861,8 +4898,17 @@ enum sync_action md_sync_action(struct mddev *mddev) !test_bit(MD_RECOVERY_NEEDED, &recovery)) return ACTION_IDLE; - if (test_bit(MD_RECOVERY_RESHAPE, &recovery) || - mddev->reshape_position != MaxSector) + /* + * Check if any sync operation (resync/recover/reshape) is + * currently active. This ensures that only one sync operation + * can run at a time. Returns the type of active operation, or + * ACTION_IDLE if none are active. + */ + active_action = md_get_active_sync_action(mddev); + if (active_action != ACTION_IDLE) + return active_action; + + if (test_bit(MD_RECOVERY_RESHAPE, &recovery)) return ACTION_RESHAPE; if (test_bit(MD_RECOVERY_RECOVER, &recovery)) @@ -5818,6 +5864,13 @@ static void md_kobj_release(struct kobject *ko) { struct mddev *mddev = container_of(ko, struct mddev, kobj); + if (legacy_async_del_gendisk) { + if (mddev->sysfs_state) + sysfs_put(mddev->sysfs_state); + if (mddev->sysfs_level) + sysfs_put(mddev->sysfs_level); + del_gendisk(mddev->gendisk); + } put_disk(mddev->gendisk); } @@ -6021,6 +6074,9 @@ static int md_alloc_and_put(dev_t dev, char *name) { struct mddev *mddev = md_alloc(dev, name); + if (legacy_async_del_gendisk) + pr_warn("md: async del_gendisk mode will be removed in future, please upgrade to mdadm-4.5+\n"); + if (IS_ERR(mddev)) return PTR_ERR(mddev); mddev_put(mddev); @@ -6431,10 +6487,22 @@ static void md_clean(struct mddev *mddev) mddev->persistent = 0; mddev->level = LEVEL_NONE; mddev->clevel[0] = 0; - /* if UNTIL_STOP is set, it's cleared here */ - mddev->hold_active = 0; - /* Don't clear MD_CLOSING, or mddev can be opened again. */ - mddev->flags &= BIT_ULL_MASK(MD_CLOSING); + + /* + * For legacy_async_del_gendisk mode, it can stop the array in the + * middle of assembling it, then it still can access the array. So + * it needs to clear MD_CLOSING. If not legacy_async_del_gendisk, + * it can't open the array again after stopping it. So it doesn't + * clear MD_CLOSING. + */ + if (legacy_async_del_gendisk && mddev->hold_active) { + clear_bit(MD_CLOSING, &mddev->flags); + } else { + /* if UNTIL_STOP is set, it's cleared here */ + mddev->hold_active = 0; + /* Don't clear MD_CLOSING, or mddev can be opened again. */ + mddev->flags &= BIT_ULL_MASK(MD_CLOSING); + } mddev->sb_flags = 0; mddev->ro = MD_RDWR; mddev->metadata_type[0] = 0; @@ -6658,7 +6726,8 @@ static int do_md_stop(struct mddev *mddev, int mode) export_array(mddev); md_clean(mddev); - set_bit(MD_DELETED, &mddev->flags); + if (!legacy_async_del_gendisk) + set_bit(MD_DELETED, &mddev->flags); } md_new_event(); sysfs_notify_dirent_safe(mddev->sysfs_state); @@ -8968,11 +9037,7 @@ static sector_t md_sync_position(struct mddev *mddev, enum sync_action action) start = MaxSector; rcu_read_lock(); rdev_for_each_rcu(rdev, mddev) - if (rdev->raid_disk >= 0 && - !test_bit(Journal, &rdev->flags) && - !test_bit(Faulty, &rdev->flags) && - !test_bit(In_sync, &rdev->flags) && - rdev->recovery_offset < start) + if (rdev_needs_recovery(rdev, start)) start = rdev->recovery_offset; rcu_read_unlock(); @@ -9331,12 +9396,8 @@ void md_do_sync(struct md_thread *thread) test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) { rcu_read_lock(); rdev_for_each_rcu(rdev, mddev) - if (rdev->raid_disk >= 0 && - mddev->delta_disks >= 0 && - !test_bit(Journal, &rdev->flags) && - !test_bit(Faulty, &rdev->flags) && - !test_bit(In_sync, &rdev->flags) && - rdev->recovery_offset < mddev->curr_resync) + if (mddev->delta_disks >= 0 && + rdev_needs_recovery(rdev, mddev->curr_resync)) rdev->recovery_offset = mddev->curr_resync; rcu_read_unlock(); } @@ -10392,6 +10453,7 @@ module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR); module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR); module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR); module_param(create_on_open, bool, S_IRUSR|S_IWUSR); +module_param(legacy_async_del_gendisk, bool, 0600); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("MD RAID framework"); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 95886b404b16..fe1797bbec42 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -656,6 +656,7 @@ enum { QUEUE_FLAG_SQ_SCHED, /* single queue style io dispatch */ QUEUE_FLAG_DISABLE_WBT_DEF, /* for sched to disable/enable wbt */ QUEUE_FLAG_NO_ELV_SWITCH, /* can't switch elevator any more */ + QUEUE_FLAG_QOS_ENABLED, /* qos is enabled */ QUEUE_FLAG_MAX }; diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index b13946287277..ac74133a4768 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -173,7 +173,7 @@ typedef struct mdp_superblock_s { #else #error unspecified endianness #endif - __u32 resync_offset; /* 11 resync checkpoint sector count */ + __u32 recovery_cp; /* 11 resync checkpoint sector count */ /* There are only valid for minor_version > 90 */ __u64 reshape_position; /* 12,13 next address in array-space for reshape */ __u32 new_level; /* 14 new level we are reshaping to */ diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 95188065b2e9..6512dfbdbce3 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -1400,7 +1400,7 @@ static int cmd_dev_get_features(void) if (!((1ULL << i) & features)) continue; - if (i < sizeof(feat_map) / sizeof(feat_map[0])) + if (i < ARRAY_SIZE(feat_map)) feat = feat_map[i]; else feat = "unknown"; @@ -1477,7 +1477,7 @@ static void __cmd_create_help(char *exe, bool recovery) printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); printf("\tdefault: nthreads=nr_queues"); - for (i = 0; i < sizeof(tgt_ops_list) / sizeof(tgt_ops_list[0]); i++) { + for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++) { const struct ublk_tgt_ops *ops = tgt_ops_list[i]; if (ops->usage) |