aboutsummaryrefslogtreecommitdiffstats
path: root/block/bio.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/bio.c')
-rw-r--r--block/bio.c231
1 files changed, 182 insertions, 49 deletions
diff --git a/block/bio.c b/block/bio.c
index 1fab762e079b..e16849f46b0e 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -25,6 +25,11 @@
#include "blk.h"
#include "blk-rq-qos.h"
+struct bio_alloc_cache {
+ struct bio_list free_list;
+ unsigned int nr;
+};
+
static struct biovec_slab {
int nr_vecs;
char *name;
@@ -246,12 +251,40 @@ static void bio_free(struct bio *bio)
void bio_init(struct bio *bio, struct bio_vec *table,
unsigned short max_vecs)
{
- memset(bio, 0, sizeof(*bio));
+ bio->bi_next = NULL;
+ bio->bi_bdev = NULL;
+ bio->bi_opf = 0;
+ bio->bi_flags = 0;
+ bio->bi_ioprio = 0;
+ bio->bi_write_hint = 0;
+ bio->bi_status = 0;
+ bio->bi_iter.bi_sector = 0;
+ bio->bi_iter.bi_size = 0;
+ bio->bi_iter.bi_idx = 0;
+ bio->bi_iter.bi_bvec_done = 0;
+ bio->bi_end_io = NULL;
+ bio->bi_private = NULL;
+#ifdef CONFIG_BLK_CGROUP
+ bio->bi_blkg = NULL;
+ bio->bi_issue.value = 0;
+#ifdef CONFIG_BLK_CGROUP_IOCOST
+ bio->bi_iocost_cost = 0;
+#endif
+#endif
+#ifdef CONFIG_BLK_INLINE_ENCRYPTION
+ bio->bi_crypt_context = NULL;
+#endif
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+ bio->bi_integrity = NULL;
+#endif
+ bio->bi_vcnt = 0;
+
atomic_set(&bio->__bi_remaining, 1);
atomic_set(&bio->__bi_cnt, 1);
- bio->bi_io_vec = table;
bio->bi_max_vecs = max_vecs;
+ bio->bi_io_vec = table;
+ bio->bi_pool = NULL;
}
EXPORT_SYMBOL(bio_init);
@@ -495,16 +528,11 @@ EXPORT_SYMBOL(bio_kmalloc);
void zero_fill_bio(struct bio *bio)
{
- unsigned long flags;
struct bio_vec bv;
struct bvec_iter iter;
- bio_for_each_segment(bv, bio, iter) {
- char *data = bvec_kmap_irq(&bv, &flags);
- memset(data, 0, bv.bv_len);
- flush_dcache_page(bv.bv_page);
- bvec_kunmap_irq(data, &flags);
- }
+ bio_for_each_segment(bv, bio, iter)
+ memzero_bvec(&bv);
}
EXPORT_SYMBOL(zero_fill_bio);
@@ -591,6 +619,53 @@ void guard_bio_eod(struct bio *bio)
bio_truncate(bio, maxsector << 9);
}
+#define ALLOC_CACHE_MAX 512
+#define ALLOC_CACHE_SLACK 64
+
+static void bio_alloc_cache_prune(struct bio_alloc_cache *cache,
+ unsigned int nr)
+{
+ unsigned int i = 0;
+ struct bio *bio;
+
+ while ((bio = bio_list_pop(&cache->free_list)) != NULL) {
+ cache->nr--;
+ bio_free(bio);
+ if (++i == nr)
+ break;
+ }
+}
+
+static int bio_cpu_dead(unsigned int cpu, struct hlist_node *node)
+{
+ struct bio_set *bs;
+
+ bs = hlist_entry_safe(node, struct bio_set, cpuhp_dead);
+ if (bs->cache) {
+ struct bio_alloc_cache *cache = per_cpu_ptr(bs->cache, cpu);
+
+ bio_alloc_cache_prune(cache, -1U);
+ }
+ return 0;
+}
+
+static void bio_alloc_cache_destroy(struct bio_set *bs)
+{
+ int cpu;
+
+ if (!bs->cache)
+ return;
+
+ cpuhp_state_remove_instance_nocalls(CPUHP_BIO_DEAD, &bs->cpuhp_dead);
+ for_each_possible_cpu(cpu) {
+ struct bio_alloc_cache *cache;
+
+ cache = per_cpu_ptr(bs->cache, cpu);
+ bio_alloc_cache_prune(cache, -1U);
+ }
+ free_percpu(bs->cache);
+}
+
/**
* bio_put - release a reference to a bio
* @bio: bio to release reference to
@@ -601,16 +676,23 @@ void guard_bio_eod(struct bio *bio)
**/
void bio_put(struct bio *bio)
{
- if (!bio_flagged(bio, BIO_REFFED))
- bio_free(bio);
- else {
+ if (unlikely(bio_flagged(bio, BIO_REFFED))) {
BIO_BUG_ON(!atomic_read(&bio->__bi_cnt));
+ if (!atomic_dec_and_test(&bio->__bi_cnt))
+ return;
+ }
- /*
- * last put frees it
- */
- if (atomic_dec_and_test(&bio->__bi_cnt))
- bio_free(bio);
+ if (bio_flagged(bio, BIO_PERCPU_CACHE)) {
+ struct bio_alloc_cache *cache;
+
+ bio_uninit(bio);
+ cache = per_cpu_ptr(bio->bi_pool->cache, get_cpu());
+ bio_list_add_head(&cache->free_list, bio);
+ if (++cache->nr > ALLOC_CACHE_MAX + ALLOC_CACHE_SLACK)
+ bio_alloc_cache_prune(cache, ALLOC_CACHE_SLACK);
+ put_cpu();
+ } else {
+ bio_free(bio);
}
}
EXPORT_SYMBOL(bio_put);
@@ -979,6 +1061,14 @@ static int bio_iov_bvec_set_append(struct bio *bio, struct iov_iter *iter)
return 0;
}
+static void bio_put_pages(struct page **pages, size_t size, size_t off)
+{
+ size_t i, nr = DIV_ROUND_UP(size + (off & ~PAGE_MASK), PAGE_SIZE);
+
+ for (i = 0; i < nr; i++)
+ put_page(pages[i]);
+}
+
#define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *))
/**
@@ -1023,8 +1113,10 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
if (same_page)
put_page(page);
} else {
- if (WARN_ON_ONCE(bio_full(bio, len)))
- return -EINVAL;
+ if (WARN_ON_ONCE(bio_full(bio, len))) {
+ bio_put_pages(pages + i, left, offset);
+ return -EINVAL;
+ }
__bio_add_page(bio, page, len, offset);
}
offset = 0;
@@ -1069,6 +1161,7 @@ static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
len = min_t(size_t, PAGE_SIZE - offset, left);
if (bio_add_hw_page(q, bio, page, len, offset,
max_append_sectors, &same_page) != len) {
+ bio_put_pages(pages + i, left, offset);
ret = -EINVAL;
break;
}
@@ -1191,27 +1284,15 @@ EXPORT_SYMBOL(bio_advance);
void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
struct bio *src, struct bvec_iter *src_iter)
{
- struct bio_vec src_bv, dst_bv;
- void *src_p, *dst_p;
- unsigned bytes;
-
while (src_iter->bi_size && dst_iter->bi_size) {
- src_bv = bio_iter_iovec(src, *src_iter);
- dst_bv = bio_iter_iovec(dst, *dst_iter);
-
- bytes = min(src_bv.bv_len, dst_bv.bv_len);
-
- src_p = kmap_atomic(src_bv.bv_page);
- dst_p = kmap_atomic(dst_bv.bv_page);
-
- memcpy(dst_p + dst_bv.bv_offset,
- src_p + src_bv.bv_offset,
- bytes);
+ struct bio_vec src_bv = bio_iter_iovec(src, *src_iter);
+ struct bio_vec dst_bv = bio_iter_iovec(dst, *dst_iter);
+ unsigned int bytes = min(src_bv.bv_len, dst_bv.bv_len);
+ void *src_buf;
- kunmap_atomic(dst_p);
- kunmap_atomic(src_p);
-
- flush_dcache_page(dst_bv.bv_page);
+ src_buf = bvec_kmap_local(&src_bv);
+ memcpy_to_bvec(&dst_bv, src_buf);
+ kunmap_local(src_buf);
bio_advance_iter_single(src, src_iter, bytes);
bio_advance_iter_single(dst, dst_iter, bytes);
@@ -1463,12 +1544,15 @@ EXPORT_SYMBOL(bio_split);
* @bio: bio to trim
* @offset: number of sectors to trim from the front of @bio
* @size: size we want to trim @bio to, in sectors
+ *
+ * This function is typically used for bios that are cloned and submitted
+ * to the underlying device in parts.
*/
-void bio_trim(struct bio *bio, int offset, int size)
+void bio_trim(struct bio *bio, sector_t offset, sector_t size)
{
- /* 'bio' is a cloned bio which we need to trim to match
- * the given offset and size.
- */
+ if (WARN_ON_ONCE(offset > BIO_MAX_SECTORS || size > BIO_MAX_SECTORS ||
+ offset + size > bio->bi_iter.bi_size))
+ return;
size <<= 9;
if (offset == 0 && size == bio->bi_iter.bi_size)
@@ -1479,7 +1563,6 @@ void bio_trim(struct bio *bio, int offset, int size)
if (bio_integrity(bio))
bio_integrity_trim(bio);
-
}
EXPORT_SYMBOL_GPL(bio_trim);
@@ -1502,6 +1585,7 @@ int biovec_init_pool(mempool_t *pool, int pool_entries)
*/
void bioset_exit(struct bio_set *bs)
{
+ bio_alloc_cache_destroy(bs);
if (bs->rescue_workqueue)
destroy_workqueue(bs->rescue_workqueue);
bs->rescue_workqueue = NULL;
@@ -1563,12 +1647,18 @@ int bioset_init(struct bio_set *bs,
biovec_init_pool(&bs->bvec_pool, pool_size))
goto bad;
- if (!(flags & BIOSET_NEED_RESCUER))
- return 0;
-
- bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
- if (!bs->rescue_workqueue)
- goto bad;
+ if (flags & BIOSET_NEED_RESCUER) {
+ bs->rescue_workqueue = alloc_workqueue("bioset",
+ WQ_MEM_RECLAIM, 0);
+ if (!bs->rescue_workqueue)
+ goto bad;
+ }
+ if (flags & BIOSET_PERCPU_CACHE) {
+ bs->cache = alloc_percpu(struct bio_alloc_cache);
+ if (!bs->cache)
+ goto bad;
+ cpuhp_state_add_instance_nocalls(CPUHP_BIO_DEAD, &bs->cpuhp_dead);
+ }
return 0;
bad:
@@ -1595,6 +1685,46 @@ int bioset_init_from_src(struct bio_set *bs, struct bio_set *src)
}
EXPORT_SYMBOL(bioset_init_from_src);
+/**
+ * bio_alloc_kiocb - Allocate a bio from bio_set based on kiocb
+ * @kiocb: kiocb describing the IO
+ * @nr_iovecs: number of iovecs to pre-allocate
+ * @bs: bio_set to allocate from
+ *
+ * Description:
+ * Like @bio_alloc_bioset, but pass in the kiocb. The kiocb is only
+ * used to check if we should dip into the per-cpu bio_set allocation
+ * cache. The allocation uses GFP_KERNEL internally. On return, the
+ * bio is marked BIO_PERCPU_CACHEABLE, and the final put of the bio
+ * MUST be done from process context, not hard/soft IRQ.
+ *
+ */
+struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs,
+ struct bio_set *bs)
+{
+ struct bio_alloc_cache *cache;
+ struct bio *bio;
+
+ if (!(kiocb->ki_flags & IOCB_ALLOC_CACHE) || nr_vecs > BIO_INLINE_VECS)
+ return bio_alloc_bioset(GFP_KERNEL, nr_vecs, bs);
+
+ cache = per_cpu_ptr(bs->cache, get_cpu());
+ bio = bio_list_pop(&cache->free_list);
+ if (bio) {
+ cache->nr--;
+ put_cpu();
+ bio_init(bio, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs);
+ bio->bi_pool = bs;
+ bio_set_flag(bio, BIO_PERCPU_CACHE);
+ return bio;
+ }
+ put_cpu();
+ bio = bio_alloc_bioset(GFP_KERNEL, nr_vecs, bs);
+ bio_set_flag(bio, BIO_PERCPU_CACHE);
+ return bio;
+}
+EXPORT_SYMBOL_GPL(bio_alloc_kiocb);
+
static int __init init_bio(void)
{
int i;
@@ -1609,6 +1739,9 @@ static int __init init_bio(void)
SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
}
+ cpuhp_setup_state_multi(CPUHP_BIO_DEAD, "block/bio:dead", NULL,
+ bio_cpu_dead);
+
if (bioset_init(&fs_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS))
panic("bio: can't allocate bios\n");