diff options
Diffstat (limited to 'include/linux/uio.h')
-rw-r--r-- | include/linux/uio.h | 318 |
1 files changed, 227 insertions, 91 deletions
diff --git a/include/linux/uio.h b/include/linux/uio.h index 72d88566694e..2e86c653186c 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -6,11 +6,14 @@ #define __LINUX_UIO_H #include <linux/kernel.h> -#include <linux/thread_info.h> +#include <linux/mm_types.h> +#include <linux/ucopysize.h> #include <uapi/linux/uio.h> struct page; -struct pipe_inode_info; +struct folio_queue; + +typedef unsigned int __bitwise iov_iter_extraction_t; struct kvec { void *iov_base; /* and that should *never* hold a userland pointer */ @@ -19,40 +22,107 @@ struct kvec { enum iter_type { /* iter types */ - ITER_IOVEC = 4, - ITER_KVEC = 8, - ITER_BVEC = 16, - ITER_PIPE = 32, - ITER_DISCARD = 64, + ITER_UBUF, + ITER_IOVEC, + ITER_BVEC, + ITER_KVEC, + ITER_FOLIOQ, + ITER_XARRAY, + ITER_DISCARD, +}; + +#define ITER_SOURCE 1 // == WRITE +#define ITER_DEST 0 // == READ + +struct iov_iter_state { + size_t iov_offset; + size_t count; + unsigned long nr_segs; }; struct iov_iter { + u8 iter_type; + bool nofault; + bool data_source; + size_t iov_offset; /* - * Bit 0 is the read/write bit, set if we're writing. - * Bit 1 is the BVEC_FLAG_NO_REF bit, set if type is a bvec and - * the caller isn't expecting to drop a page reference when done. + * Hack alert: overlay ubuf_iovec with iovec + count, so + * that the members resolve correctly regardless of the type + * of iterator used. This means that you can use: + * + * &iter->__ubuf_iovec or iter->__iov + * + * interchangably for the user_backed cases, hence simplifying + * some of the cases that need to deal with both. */ - unsigned int type; - size_t iov_offset; - size_t count; union { - const struct iovec *iov; - const struct kvec *kvec; - const struct bio_vec *bvec; - struct pipe_inode_info *pipe; + /* + * This really should be a const, but we cannot do that without + * also modifying any of the zero-filling iter init functions. + * Leave it non-const for now, but it should be treated as such. + */ + struct iovec __ubuf_iovec; + struct { + union { + /* use iter_iov() to get the current vec */ + const struct iovec *__iov; + const struct kvec *kvec; + const struct bio_vec *bvec; + const struct folio_queue *folioq; + struct xarray *xarray; + void __user *ubuf; + }; + size_t count; + }; }; union { unsigned long nr_segs; - struct { - unsigned int head; - unsigned int start_head; - }; + u8 folioq_slot; + loff_t xarray_start; }; }; +typedef __u16 uio_meta_flags_t; + +struct uio_meta { + uio_meta_flags_t flags; + u16 app_tag; + u64 seed; + struct iov_iter iter; +}; + +static inline const struct iovec *iter_iov(const struct iov_iter *iter) +{ + if (iter->iter_type == ITER_UBUF) + return (const struct iovec *) &iter->__ubuf_iovec; + return iter->__iov; +} + +#define iter_iov_addr(iter) (iter_iov(iter)->iov_base + (iter)->iov_offset) + +static inline size_t iter_iov_len(const struct iov_iter *i) +{ + if (i->iter_type == ITER_UBUF) + return i->count; + return iter_iov(i)->iov_len - i->iov_offset; +} + static inline enum iter_type iov_iter_type(const struct iov_iter *i) { - return i->type & ~(READ | WRITE); + return i->iter_type; +} + +static inline void iov_iter_save_state(struct iov_iter *iter, + struct iov_iter_state *state) +{ + state->iov_offset = iter->iov_offset; + state->count = iter->count; + state->nr_segs = iter->nr_segs; +} + +static inline bool iter_is_ubuf(const struct iov_iter *i) +{ + return iov_iter_type(i) == ITER_UBUF; } static inline bool iter_is_iovec(const struct iov_iter *i) @@ -70,19 +140,29 @@ static inline bool iov_iter_is_bvec(const struct iov_iter *i) return iov_iter_type(i) == ITER_BVEC; } -static inline bool iov_iter_is_pipe(const struct iov_iter *i) +static inline bool iov_iter_is_discard(const struct iov_iter *i) { - return iov_iter_type(i) == ITER_PIPE; + return iov_iter_type(i) == ITER_DISCARD; } -static inline bool iov_iter_is_discard(const struct iov_iter *i) +static inline bool iov_iter_is_folioq(const struct iov_iter *i) { - return iov_iter_type(i) == ITER_DISCARD; + return iov_iter_type(i) == ITER_FOLIOQ; +} + +static inline bool iov_iter_is_xarray(const struct iov_iter *i) +{ + return iov_iter_type(i) == ITER_XARRAY; } static inline unsigned char iov_iter_rw(const struct iov_iter *i) { - return i->type & (READ | WRITE); + return i->data_source ? WRITE : READ; +} + +static inline bool user_backed_iter(const struct iov_iter *i) +{ + return iter_is_ubuf(i) || iter_is_iovec(i); } /* @@ -102,81 +182,95 @@ static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs) return ret; } -static inline struct iovec iov_iter_iovec(const struct iov_iter *iter) -{ - return (struct iovec) { - .iov_base = iter->iov->iov_base + iter->iov_offset, - .iov_len = min(iter->count, - iter->iov->iov_len - iter->iov_offset), - }; -} - -size_t iov_iter_copy_from_user_atomic(struct page *page, - struct iov_iter *i, unsigned long offset, size_t bytes); void iov_iter_advance(struct iov_iter *i, size_t bytes); void iov_iter_revert(struct iov_iter *i, size_t bytes); -int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); +size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t bytes); +size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t bytes); size_t iov_iter_single_seg_count(const struct iov_iter *i); size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); +size_t copy_folio_from_iter_atomic(struct folio *folio, size_t offset, + size_t bytes, struct iov_iter *i); size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i); size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i); -bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i); size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i); -bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i); + +static inline size_t copy_folio_to_iter(struct folio *folio, size_t offset, + size_t bytes, struct iov_iter *i) +{ + return copy_page_to_iter(&folio->page, offset, bytes, i); +} + +static inline size_t copy_folio_from_iter(struct folio *folio, size_t offset, + size_t bytes, struct iov_iter *i) +{ + return copy_page_from_iter(&folio->page, offset, bytes, i); +} + +size_t copy_page_to_iter_nofault(struct page *page, unsigned offset, + size_t bytes, struct iov_iter *i); static __always_inline __must_check size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { - if (unlikely(!check_copy_size(addr, bytes, true))) - return 0; - else + if (check_copy_size(addr, bytes, true)) return _copy_to_iter(addr, bytes, i); + return 0; } static __always_inline __must_check size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) { - if (unlikely(!check_copy_size(addr, bytes, false))) - return 0; - else + if (check_copy_size(addr, bytes, false)) return _copy_from_iter(addr, bytes, i); + return 0; +} + +static __always_inline __must_check +bool copy_to_iter_full(const void *addr, size_t bytes, struct iov_iter *i) +{ + size_t copied = copy_to_iter(addr, bytes, i); + if (likely(copied == bytes)) + return true; + iov_iter_revert(i, copied); + return false; } static __always_inline __must_check bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) { - if (unlikely(!check_copy_size(addr, bytes, false))) - return false; - else - return _copy_from_iter_full(addr, bytes, i); + size_t copied = copy_from_iter(addr, bytes, i); + if (likely(copied == bytes)) + return true; + iov_iter_revert(i, copied); + return false; } static __always_inline __must_check size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) { - if (unlikely(!check_copy_size(addr, bytes, false))) - return 0; - else + if (check_copy_size(addr, bytes, false)) return _copy_from_iter_nocache(addr, bytes, i); + return 0; } static __always_inline __must_check bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) { - if (unlikely(!check_copy_size(addr, bytes, false))) - return false; - else - return _copy_from_iter_full_nocache(addr, bytes, i); + size_t copied = copy_from_iter_nocache(addr, bytes, i); + if (likely(copied == bytes)) + return true; + iov_iter_revert(i, copied); + return false; } #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE /* * Note, users like pmem that depend on the stricter semantics of - * copy_from_iter_flushcache() than copy_from_iter_nocache() must check for + * _copy_from_iter_flushcache() than _copy_from_iter_nocache() must check for * IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) before assuming that the * destination is flushed from the cache on return. */ @@ -191,25 +285,9 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i); #define _copy_mc_to_iter _copy_to_iter #endif -static __always_inline __must_check -size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) -{ - if (unlikely(!check_copy_size(addr, bytes, false))) - return 0; - else - return _copy_from_iter_flushcache(addr, bytes, i); -} - -static __always_inline __must_check -size_t copy_mc_to_iter(void *addr, size_t bytes, struct iov_iter *i) -{ - if (unlikely(!check_copy_size(addr, bytes, true))) - return 0; - else - return _copy_mc_to_iter(addr, bytes, i); -} - size_t iov_iter_zero(size_t bytes, struct iov_iter *); +bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask, + unsigned len_mask); unsigned long iov_iter_alignment(const struct iov_iter *i); unsigned long iov_iter_gap_alignment(const struct iov_iter *i); void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov, @@ -218,14 +296,18 @@ void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec unsigned long nr_segs, size_t count); void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec, unsigned long nr_segs, size_t count); -void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe, - size_t count); void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count); -ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, +void iov_iter_folio_queue(struct iov_iter *i, unsigned int direction, + const struct folio_queue *folioq, + unsigned int first_slot, unsigned int offset, size_t count); +void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray, + loff_t start, size_t count); +ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start); -ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, +ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start); int iov_iter_npages(const struct iov_iter *i, int maxpages); +void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state); const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags); @@ -260,11 +342,23 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count) { i->count = count; } -size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, struct iov_iter *i); -size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); -bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); -size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, - struct iov_iter *i); + +static inline int +iov_iter_npages_cap(struct iov_iter *i, int maxpages, size_t max_bytes) +{ + size_t shorted = 0; + int npages; + + if (iov_iter_count(i) > max_bytes) { + shorted = iov_iter_count(i) - max_bytes; + iov_iter_truncate(i, max_bytes); + } + npages = iov_iter_npages(i, maxpages); + if (shorted) + iov_iter_reexpand(i, iov_iter_count(i) + shorted); + + return npages; +} struct iovec *iovec_from_user(const struct iovec __user *uvector, unsigned long nr_segs, unsigned long fast_segs, @@ -275,11 +369,53 @@ ssize_t import_iovec(int type, const struct iovec __user *uvec, ssize_t __import_iovec(int type, const struct iovec __user *uvec, unsigned nr_segs, unsigned fast_segs, struct iovec **iovp, struct iov_iter *i, bool compat); -int import_single_range(int type, void __user *buf, size_t len, - struct iovec *iov, struct iov_iter *i); +int import_ubuf(int type, void __user *buf, size_t len, struct iov_iter *i); + +static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction, + void __user *buf, size_t count) +{ + WARN_ON(direction & ~(READ | WRITE)); + *i = (struct iov_iter) { + .iter_type = ITER_UBUF, + .data_source = direction, + .ubuf = buf, + .count = count, + .nr_segs = 1 + }; +} +/* Flags for iov_iter_get/extract_pages*() */ +/* Allow P2PDMA on the extracted pages */ +#define ITER_ALLOW_P2PDMA ((__force iov_iter_extraction_t)0x01) + +ssize_t iov_iter_extract_pages(struct iov_iter *i, struct page ***pages, + size_t maxsize, unsigned int maxpages, + iov_iter_extraction_t extraction_flags, + size_t *offset0); + +/** + * iov_iter_extract_will_pin - Indicate how pages from the iterator will be retained + * @iter: The iterator + * + * Examine the iterator and indicate by returning true or false as to how, if + * at all, pages extracted from the iterator will be retained by the extraction + * function. + * + * %true indicates that the pages will have a pin placed in them that the + * caller must unpin. This is must be done for DMA/async DIO to force fork() + * to forcibly copy a page for the child (the parent must retain the original + * page). + * + * %false indicates that no measures are taken and that it's up to the caller + * to retain the pages. + */ +static inline bool iov_iter_extract_will_pin(const struct iov_iter *iter) +{ + return user_backed_iter(iter); +} -int iov_iter_for_each_range(struct iov_iter *i, size_t bytes, - int (*f)(struct kvec *vec, void *context), - void *context); +struct sg_table; +ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t len, + struct sg_table *sgtable, unsigned int sg_max, + iov_iter_extraction_t extraction_flags); #endif |