aboutsummaryrefslogtreecommitdiffstats
path: root/block/bio.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/bio.c')
-rw-r--r--block/bio.c111
1 files changed, 87 insertions, 24 deletions
diff --git a/block/bio.c b/block/bio.c
index 4db1008309ed..71a78d9fb8b7 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -753,6 +753,8 @@ EXPORT_SYMBOL(bio_add_pc_page);
* @page: page to add
* @len: length of the data to add
* @off: offset of the data in @page
+ * @same_page: if %true only merge if the new data is in the same physical
+ * page as the last segment of the bio.
*
* Try to add the data at @page + @off to the last bvec of @bio. This is a
* a useful optimisation for file systems with a block size smaller than the
@@ -761,19 +763,25 @@ EXPORT_SYMBOL(bio_add_pc_page);
* Return %true on success or %false on failure.
*/
bool __bio_try_merge_page(struct bio *bio, struct page *page,
- unsigned int len, unsigned int off)
+ unsigned int len, unsigned int off, bool same_page)
{
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
return false;
if (bio->bi_vcnt > 0) {
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
+ phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) +
+ bv->bv_offset + bv->bv_len - 1;
+ phys_addr_t page_addr = page_to_phys(page);
- if (page == bv->bv_page && off == bv->bv_offset + bv->bv_len) {
- bv->bv_len += len;
- bio->bi_iter.bi_size += len;
- return true;
- }
+ if (vec_end_addr + 1 != page_addr + off)
+ return false;
+ if (same_page && (vec_end_addr & PAGE_MASK) != page_addr)
+ return false;
+
+ bv->bv_len += len;
+ bio->bi_iter.bi_size += len;
+ return true;
}
return false;
}
@@ -819,7 +827,7 @@ EXPORT_SYMBOL_GPL(__bio_add_page);
int bio_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int offset)
{
- if (!__bio_try_merge_page(bio, page, len, offset)) {
+ if (!__bio_try_merge_page(bio, page, len, offset, false)) {
if (bio_full(bio))
return 0;
__bio_add_page(bio, page, len, offset);
@@ -828,6 +836,40 @@ int bio_add_page(struct bio *bio, struct page *page,
}
EXPORT_SYMBOL(bio_add_page);
+static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter)
+{
+ const struct bio_vec *bv = iter->bvec;
+ unsigned int len;
+ size_t size;
+
+ if (WARN_ON_ONCE(iter->iov_offset > bv->bv_len))
+ return -EINVAL;
+
+ len = min_t(size_t, bv->bv_len - iter->iov_offset, iter->count);
+ size = bio_add_page(bio, bv->bv_page, len,
+ bv->bv_offset + iter->iov_offset);
+ if (size == len) {
+ struct page *page;
+ int i;
+
+ /*
+ * For the normal O_DIRECT case, we could skip grabbing this
+ * reference and then not have to put them again when IO
+ * completes. But this breaks some in-kernel users, like
+ * splicing to/from a loop device, where we release the pipe
+ * pages unconditionally. If we can fix that case, we can
+ * get rid of the get here and the need to call
+ * bio_release_pages() at IO completion time.
+ */
+ mp_bvec_for_each_page(page, bv, i)
+ get_page(page);
+ iov_iter_advance(iter, size);
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
#define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *))
/**
@@ -876,23 +918,35 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
}
/**
- * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
+ * bio_iov_iter_get_pages - add user or kernel pages to a bio
* @bio: bio to add pages to
- * @iter: iov iterator describing the region to be mapped
+ * @iter: iov iterator describing the region to be added
+ *
+ * This takes either an iterator pointing to user memory, or one pointing to
+ * kernel pages (BVEC iterator). If we're adding user pages, we pin them and
+ * map them into the kernel. On IO completion, the caller should put those
+ * pages. For now, when adding kernel pages, we still grab a reference to the
+ * page. This isn't strictly needed for the common case, but some call paths
+ * end up releasing pages from eg a pipe and we can't easily control these.
+ * See comment in __bio_iov_bvec_add_pages().
*
- * Pins pages from *iter and appends them to @bio's bvec array. The
- * pages will have to be released using put_page() when done.
* The function tries, but does not guarantee, to pin as many pages as
- * fit into the bio, or are requested in *iter, whatever is smaller.
- * If MM encounters an error pinning the requested pages, it stops.
- * Error is returned only if 0 pages could be pinned.
+ * fit into the bio, or are requested in *iter, whatever is smaller. If
+ * MM encounters an error pinning the requested pages, it stops. Error
+ * is returned only if 0 pages could be pinned.
*/
int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
{
+ const bool is_bvec = iov_iter_is_bvec(iter);
unsigned short orig_vcnt = bio->bi_vcnt;
do {
- int ret = __bio_iov_iter_get_pages(bio, iter);
+ int ret;
+
+ if (is_bvec)
+ ret = __bio_iov_bvec_add_pages(bio, iter);
+ else
+ ret = __bio_iov_iter_get_pages(bio, iter);
if (unlikely(ret))
return bio->bi_vcnt > orig_vcnt ? 0 : ret;
@@ -1072,8 +1126,9 @@ static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter)
{
int i;
struct bio_vec *bvec;
+ struct bvec_iter_all iter_all;
- bio_for_each_segment_all(bvec, bio, i) {
+ bio_for_each_segment_all(bvec, bio, i, iter_all) {
ssize_t ret;
ret = copy_page_from_iter(bvec->bv_page,
@@ -1103,8 +1158,9 @@ static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
{
int i;
struct bio_vec *bvec;
+ struct bvec_iter_all iter_all;
- bio_for_each_segment_all(bvec, bio, i) {
+ bio_for_each_segment_all(bvec, bio, i, iter_all) {
ssize_t ret;
ret = copy_page_to_iter(bvec->bv_page,
@@ -1126,8 +1182,9 @@ void bio_free_pages(struct bio *bio)
{
struct bio_vec *bvec;
int i;
+ struct bvec_iter_all iter_all;
- bio_for_each_segment_all(bvec, bio, i)
+ bio_for_each_segment_all(bvec, bio, i, iter_all)
__free_page(bvec->bv_page);
}
EXPORT_SYMBOL(bio_free_pages);
@@ -1295,6 +1352,7 @@ struct bio *bio_map_user_iov(struct request_queue *q,
struct bio *bio;
int ret;
struct bio_vec *bvec;
+ struct bvec_iter_all iter_all;
if (!iov_iter_count(iter))
return ERR_PTR(-EINVAL);
@@ -1368,7 +1426,7 @@ struct bio *bio_map_user_iov(struct request_queue *q,
return bio;
out_unmap:
- bio_for_each_segment_all(bvec, bio, j) {
+ bio_for_each_segment_all(bvec, bio, j, iter_all) {
put_page(bvec->bv_page);
}
bio_put(bio);
@@ -1379,11 +1437,12 @@ static void __bio_unmap_user(struct bio *bio)
{
struct bio_vec *bvec;
int i;
+ struct bvec_iter_all iter_all;
/*
* make sure we dirty pages we wrote to
*/
- bio_for_each_segment_all(bvec, bio, i) {
+ bio_for_each_segment_all(bvec, bio, i, iter_all) {
if (bio_data_dir(bio) == READ)
set_page_dirty_lock(bvec->bv_page);
@@ -1475,8 +1534,9 @@ static void bio_copy_kern_endio_read(struct bio *bio)
char *p = bio->bi_private;
struct bio_vec *bvec;
int i;
+ struct bvec_iter_all iter_all;
- bio_for_each_segment_all(bvec, bio, i) {
+ bio_for_each_segment_all(bvec, bio, i, iter_all) {
memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
p += bvec->bv_len;
}
@@ -1585,8 +1645,9 @@ void bio_set_pages_dirty(struct bio *bio)
{
struct bio_vec *bvec;
int i;
+ struct bvec_iter_all iter_all;
- bio_for_each_segment_all(bvec, bio, i) {
+ bio_for_each_segment_all(bvec, bio, i, iter_all) {
if (!PageCompound(bvec->bv_page))
set_page_dirty_lock(bvec->bv_page);
}
@@ -1596,8 +1657,9 @@ static void bio_release_pages(struct bio *bio)
{
struct bio_vec *bvec;
int i;
+ struct bvec_iter_all iter_all;
- bio_for_each_segment_all(bvec, bio, i)
+ bio_for_each_segment_all(bvec, bio, i, iter_all)
put_page(bvec->bv_page);
}
@@ -1644,8 +1706,9 @@ void bio_check_pages_dirty(struct bio *bio)
struct bio_vec *bvec;
unsigned long flags;
int i;
+ struct bvec_iter_all iter_all;
- bio_for_each_segment_all(bvec, bio, i) {
+ bio_for_each_segment_all(bvec, bio, i, iter_all) {
if (!PageDirty(bvec->bv_page) && !PageCompound(bvec->bv_page))
goto defer;
}