aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid10.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r--drivers/md/raid10.c86
1 files changed, 38 insertions, 48 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index aea11476fee6..8a1354a08a1a 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -64,31 +64,6 @@
* [B A] [D C] [B A] [E C D]
*/
-/*
- * Number of guaranteed r10bios in case of extreme VM load:
- */
-#define NR_RAID10_BIOS 256
-
-/* when we get a read error on a read-only array, we redirect to another
- * device without failing the first device, or trying to over-write to
- * correct the read error. To keep track of bad blocks on a per-bio
- * level, we store IO_BLOCKED in the appropriate 'bios' pointer
- */
-#define IO_BLOCKED ((struct bio *)1)
-/* When we successfully write to a known bad-block, we need to remove the
- * bad-block marking which must be done from process context. So we record
- * the success by setting devs[n].bio to IO_MADE_GOOD
- */
-#define IO_MADE_GOOD ((struct bio *)2)
-
-#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2)
-
-/* When there are this many requests queued to be written by
- * the raid10 thread, we become 'congested' to provide back-pressure
- * for writeback.
- */
-static int max_queued_requests = 1024;
-
static void allow_barrier(struct r10conf *conf);
static void lower_barrier(struct r10conf *conf);
static int _enough(struct r10conf *conf, int previous, int ignore);
@@ -123,11 +98,6 @@ static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
return kzalloc(size, gfp_flags);
}
-static void r10bio_pool_free(void *r10_bio, void *data)
-{
- kfree(r10_bio);
-}
-
#define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9)
/* amount of memory to reserve for resync requests */
#define RESYNC_WINDOW (1024*1024)
@@ -233,7 +203,7 @@ out_free_bio:
}
kfree(rps);
out_free_r10bio:
- r10bio_pool_free(r10_bio, conf);
+ rbio_pool_free(r10_bio, conf);
return NULL;
}
@@ -261,7 +231,7 @@ static void r10buf_pool_free(void *__r10_bio, void *data)
/* resync pages array stored in the 1st bio's .bi_private */
kfree(rp);
- r10bio_pool_free(r10bio, conf);
+ rbio_pool_free(r10bio, conf);
}
static void put_all_bios(struct r10conf *conf, struct r10bio *r10_bio)
@@ -737,15 +707,19 @@ static struct md_rdev *read_balance(struct r10conf *conf,
int sectors = r10_bio->sectors;
int best_good_sectors;
sector_t new_distance, best_dist;
- struct md_rdev *best_rdev, *rdev = NULL;
+ struct md_rdev *best_dist_rdev, *best_pending_rdev, *rdev = NULL;
int do_balance;
- int best_slot;
+ int best_dist_slot, best_pending_slot;
+ bool has_nonrot_disk = false;
+ unsigned int min_pending;
struct geom *geo = &conf->geo;
raid10_find_phys(conf, r10_bio);
rcu_read_lock();
- best_slot = -1;
- best_rdev = NULL;
+ best_dist_slot = -1;
+ min_pending = UINT_MAX;
+ best_dist_rdev = NULL;
+ best_pending_rdev = NULL;
best_dist = MaxSector;
best_good_sectors = 0;
do_balance = 1;
@@ -767,6 +741,8 @@ static struct md_rdev *read_balance(struct r10conf *conf,
sector_t first_bad;
int bad_sectors;
sector_t dev_sector;
+ unsigned int pending;
+ bool nonrot;
if (r10_bio->devs[slot].bio == IO_BLOCKED)
continue;
@@ -803,8 +779,8 @@ static struct md_rdev *read_balance(struct r10conf *conf,
first_bad - dev_sector;
if (good_sectors > best_good_sectors) {
best_good_sectors = good_sectors;
- best_slot = slot;
- best_rdev = rdev;
+ best_dist_slot = slot;
+ best_dist_rdev = rdev;
}
if (!do_balance)
/* Must read from here */
@@ -817,14 +793,23 @@ static struct md_rdev *read_balance(struct r10conf *conf,
if (!do_balance)
break;
- if (best_slot >= 0)
+ nonrot = blk_queue_nonrot(bdev_get_queue(rdev->bdev));
+ has_nonrot_disk |= nonrot;
+ pending = atomic_read(&rdev->nr_pending);
+ if (min_pending > pending && nonrot) {
+ min_pending = pending;
+ best_pending_slot = slot;
+ best_pending_rdev = rdev;
+ }
+
+ if (best_dist_slot >= 0)
/* At least 2 disks to choose from so failfast is OK */
set_bit(R10BIO_FailFast, &r10_bio->state);
/* This optimisation is debatable, and completely destroys
* sequential read speed for 'far copies' arrays. So only
* keep it for 'near' arrays, and review those later.
*/
- if (geo->near_copies > 1 && !atomic_read(&rdev->nr_pending))
+ if (geo->near_copies > 1 && !pending)
new_distance = 0;
/* for far > 1 always use the lowest address */
@@ -833,15 +818,21 @@ static struct md_rdev *read_balance(struct r10conf *conf,
else
new_distance = abs(r10_bio->devs[slot].addr -
conf->mirrors[disk].head_position);
+
if (new_distance < best_dist) {
best_dist = new_distance;
- best_slot = slot;
- best_rdev = rdev;
+ best_dist_slot = slot;
+ best_dist_rdev = rdev;
}
}
if (slot >= conf->copies) {
- slot = best_slot;
- rdev = best_rdev;
+ if (has_nonrot_disk) {
+ slot = best_pending_slot;
+ rdev = best_pending_rdev;
+ } else {
+ slot = best_dist_slot;
+ rdev = best_dist_rdev;
+ }
}
if (slot >= 0) {
@@ -3675,8 +3666,8 @@ static struct r10conf *setup_conf(struct mddev *mddev)
conf->geo = geo;
conf->copies = copies;
- err = mempool_init(&conf->r10bio_pool, NR_RAID10_BIOS, r10bio_pool_alloc,
- r10bio_pool_free, conf);
+ err = mempool_init(&conf->r10bio_pool, NR_RAID_BIOS, r10bio_pool_alloc,
+ rbio_pool_free, conf);
if (err)
goto out;
@@ -4780,8 +4771,7 @@ static int handle_reshape_read_error(struct mddev *mddev,
int idx = 0;
struct page **pages;
- r10b = kmalloc(sizeof(*r10b) +
- sizeof(struct r10dev) * conf->copies, GFP_NOIO);
+ r10b = kmalloc(struct_size(r10b, devs, conf->copies), GFP_NOIO);
if (!r10b) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
return -ENOMEM;