diff options
Diffstat (limited to 'fs/read_write.c')
-rw-r--r-- | fs/read_write.c | 433 |
1 files changed, 71 insertions, 362 deletions
diff --git a/fs/read_write.c b/fs/read_write.c index 016444255d3e..75f764b43418 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -419,27 +419,42 @@ static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo return ret; } +static int warn_unsupported(struct file *file, const char *op) +{ + pr_warn_ratelimited( + "kernel %s not supported for file %pD4 (pid: %d comm: %.20s)\n", + op, file, current->pid, current->comm); + return -EINVAL; +} + ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) { - mm_segment_t old_fs = get_fs(); + struct kvec iov = { + .iov_base = buf, + .iov_len = min_t(size_t, count, MAX_RW_COUNT), + }; + struct kiocb kiocb; + struct iov_iter iter; ssize_t ret; if (WARN_ON_ONCE(!(file->f_mode & FMODE_READ))) return -EINVAL; if (!(file->f_mode & FMODE_CAN_READ)) return -EINVAL; + /* + * Also fail if ->read_iter and ->read are both wired up as that + * implies very convoluted semantics. + */ + if (unlikely(!file->f_op->read_iter || file->f_op->read)) + return warn_unsupported(file, "read"); - if (count > MAX_RW_COUNT) - count = MAX_RW_COUNT; - set_fs(KERNEL_DS); - if (file->f_op->read) - ret = file->f_op->read(file, (void __user *)buf, count, pos); - else if (file->f_op->read_iter) - ret = new_sync_read(file, (void __user *)buf, count, pos); - else - ret = -EINVAL; - set_fs(old_fs); + init_sync_kiocb(&kiocb, file); + kiocb.ki_pos = pos ? *pos : 0; + iov_iter_kvec(&iter, READ, &iov, 1, iov.iov_len); + ret = file->f_op->read_iter(&kiocb, &iter); if (ret > 0) { + if (pos) + *pos = kiocb.ki_pos; fsnotify_access(file); add_rchar(current, ret); } @@ -510,28 +525,32 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t /* caller is responsible for file_start_write/file_end_write */ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) { - mm_segment_t old_fs; - const char __user *p; + struct kvec iov = { + .iov_base = (void *)buf, + .iov_len = min_t(size_t, count, MAX_RW_COUNT), + }; + struct kiocb kiocb; + struct iov_iter iter; ssize_t ret; if (WARN_ON_ONCE(!(file->f_mode & FMODE_WRITE))) return -EBADF; if (!(file->f_mode & FMODE_CAN_WRITE)) return -EINVAL; + /* + * Also fail if ->write_iter and ->write are both wired up as that + * implies very convoluted semantics. + */ + if (unlikely(!file->f_op->write_iter || file->f_op->write)) + return warn_unsupported(file, "write"); - old_fs = get_fs(); - set_fs(KERNEL_DS); - p = (__force const char __user *)buf; - if (count > MAX_RW_COUNT) - count = MAX_RW_COUNT; - if (file->f_op->write) - ret = file->f_op->write(file, p, count, pos); - else if (file->f_op->write_iter) - ret = new_sync_write(file, p, count, pos); - else - ret = -EINVAL; - set_fs(old_fs); + init_sync_kiocb(&kiocb, file); + kiocb.ki_pos = pos ? *pos : 0; + iov_iter_kvec(&iter, WRITE, &iov, 1, iov.iov_len); + ret = file->f_op->write_iter(&kiocb, &iter); if (ret > 0) { + if (pos) + *pos = kiocb.ki_pos; fsnotify_modify(file); add_wchar(current, ret); } @@ -760,185 +779,6 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter, return ret; } -/** - * rw_copy_check_uvector() - Copy an array of &struct iovec from userspace - * into the kernel and check that it is valid. - * - * @type: One of %CHECK_IOVEC_ONLY, %READ, or %WRITE. - * @uvector: Pointer to the userspace array. - * @nr_segs: Number of elements in userspace array. - * @fast_segs: Number of elements in @fast_pointer. - * @fast_pointer: Pointer to (usually small on-stack) kernel array. - * @ret_pointer: (output parameter) Pointer to a variable that will point to - * either @fast_pointer, a newly allocated kernel array, or NULL, - * depending on which array was used. - * - * This function copies an array of &struct iovec of @nr_segs from - * userspace into the kernel and checks that each element is valid (e.g. - * it does not point to a kernel address or cause overflow by being too - * large, etc.). - * - * As an optimization, the caller may provide a pointer to a small - * on-stack array in @fast_pointer, typically %UIO_FASTIOV elements long - * (the size of this array, or 0 if unused, should be given in @fast_segs). - * - * @ret_pointer will always point to the array that was used, so the - * caller must take care not to call kfree() on it e.g. in case the - * @fast_pointer array was used and it was allocated on the stack. - * - * Return: The total number of bytes covered by the iovec array on success - * or a negative error code on error. - */ -ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, - unsigned long nr_segs, unsigned long fast_segs, - struct iovec *fast_pointer, - struct iovec **ret_pointer) -{ - unsigned long seg; - ssize_t ret; - struct iovec *iov = fast_pointer; - - /* - * SuS says "The readv() function *may* fail if the iovcnt argument - * was less than or equal to 0, or greater than {IOV_MAX}. Linux has - * traditionally returned zero for zero segments, so... - */ - if (nr_segs == 0) { - ret = 0; - goto out; - } - - /* - * First get the "struct iovec" from user memory and - * verify all the pointers - */ - if (nr_segs > UIO_MAXIOV) { - ret = -EINVAL; - goto out; - } - if (nr_segs > fast_segs) { - iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL); - if (iov == NULL) { - ret = -ENOMEM; - goto out; - } - } - if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) { - ret = -EFAULT; - goto out; - } - - /* - * According to the Single Unix Specification we should return EINVAL - * if an element length is < 0 when cast to ssize_t or if the - * total length would overflow the ssize_t return value of the - * system call. - * - * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the - * overflow case. - */ - ret = 0; - for (seg = 0; seg < nr_segs; seg++) { - void __user *buf = iov[seg].iov_base; - ssize_t len = (ssize_t)iov[seg].iov_len; - - /* see if we we're about to use an invalid len or if - * it's about to overflow ssize_t */ - if (len < 0) { - ret = -EINVAL; - goto out; - } - if (type >= 0 - && unlikely(!access_ok(buf, len))) { - ret = -EFAULT; - goto out; - } - if (len > MAX_RW_COUNT - ret) { - len = MAX_RW_COUNT - ret; - iov[seg].iov_len = len; - } - ret += len; - } -out: - *ret_pointer = iov; - return ret; -} - -#ifdef CONFIG_COMPAT -ssize_t compat_rw_copy_check_uvector(int type, - const struct compat_iovec __user *uvector, unsigned long nr_segs, - unsigned long fast_segs, struct iovec *fast_pointer, - struct iovec **ret_pointer) -{ - compat_ssize_t tot_len; - struct iovec *iov = *ret_pointer = fast_pointer; - ssize_t ret = 0; - int seg; - - /* - * SuS says "The readv() function *may* fail if the iovcnt argument - * was less than or equal to 0, or greater than {IOV_MAX}. Linux has - * traditionally returned zero for zero segments, so... - */ - if (nr_segs == 0) - goto out; - - ret = -EINVAL; - if (nr_segs > UIO_MAXIOV) - goto out; - if (nr_segs > fast_segs) { - ret = -ENOMEM; - iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL); - if (iov == NULL) - goto out; - } - *ret_pointer = iov; - - ret = -EFAULT; - if (!access_ok(uvector, nr_segs*sizeof(*uvector))) - goto out; - - /* - * Single unix specification: - * We should -EINVAL if an element length is not >= 0 and fitting an - * ssize_t. - * - * In Linux, the total length is limited to MAX_RW_COUNT, there is - * no overflow possibility. - */ - tot_len = 0; - ret = -EINVAL; - for (seg = 0; seg < nr_segs; seg++) { - compat_uptr_t buf; - compat_ssize_t len; - - if (__get_user(len, &uvector->iov_len) || - __get_user(buf, &uvector->iov_base)) { - ret = -EFAULT; - goto out; - } - if (len < 0) /* size_t not fitting in compat_ssize_t .. */ - goto out; - if (type >= 0 && - !access_ok(compat_ptr(buf), len)) { - ret = -EFAULT; - goto out; - } - if (len > MAX_RW_COUNT - tot_len) - len = MAX_RW_COUNT - tot_len; - tot_len += len; - iov->iov_base = compat_ptr(buf); - iov->iov_len = (compat_size_t) len; - uvector++; - iov++; - } - ret = tot_len; - -out: - return ret; -} -#endif - static ssize_t do_iter_read(struct file *file, struct iov_iter *iter, loff_t *pos, rwf_t flags) { @@ -1068,7 +908,7 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, } EXPORT_SYMBOL(vfs_iter_write); -ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, +static ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, unsigned long vlen, loff_t *pos, rwf_t flags) { struct iovec iovstack[UIO_FASTIOV]; @@ -1255,224 +1095,93 @@ SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec, return do_pwritev(fd, vec, vlen, pos, flags); } +/* + * Various compat syscalls. Note that they all pretend to take a native + * iovec - import_iovec will properly treat those as compat_iovecs based on + * in_compat_syscall(). + */ #ifdef CONFIG_COMPAT -static size_t compat_readv(struct file *file, - const struct compat_iovec __user *vec, - unsigned long vlen, loff_t *pos, rwf_t flags) -{ - struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov = iovstack; - struct iov_iter iter; - ssize_t ret; - - ret = compat_import_iovec(READ, vec, vlen, UIO_FASTIOV, &iov, &iter); - if (ret >= 0) { - ret = do_iter_read(file, &iter, pos, flags); - kfree(iov); - } - if (ret > 0) - add_rchar(current, ret); - inc_syscr(current); - return ret; -} - -static size_t do_compat_readv(compat_ulong_t fd, - const struct compat_iovec __user *vec, - compat_ulong_t vlen, rwf_t flags) -{ - struct fd f = fdget_pos(fd); - ssize_t ret; - loff_t pos; - - if (!f.file) - return -EBADF; - pos = f.file->f_pos; - ret = compat_readv(f.file, vec, vlen, &pos, flags); - if (ret >= 0) - f.file->f_pos = pos; - fdput_pos(f); - return ret; - -} - -COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, - const struct compat_iovec __user *,vec, - compat_ulong_t, vlen) -{ - return do_compat_readv(fd, vec, vlen, 0); -} - -static long do_compat_preadv64(unsigned long fd, - const struct compat_iovec __user *vec, - unsigned long vlen, loff_t pos, rwf_t flags) -{ - struct fd f; - ssize_t ret; - - if (pos < 0) - return -EINVAL; - f = fdget(fd); - if (!f.file) - return -EBADF; - ret = -ESPIPE; - if (f.file->f_mode & FMODE_PREAD) - ret = compat_readv(f.file, vec, vlen, &pos, flags); - fdput(f); - return ret; -} - #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, - const struct compat_iovec __user *,vec, + const struct iovec __user *, vec, unsigned long, vlen, loff_t, pos) { - return do_compat_preadv64(fd, vec, vlen, pos, 0); + return do_preadv(fd, vec, vlen, pos, 0); } #endif COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd, - const struct compat_iovec __user *,vec, + const struct iovec __user *, vec, compat_ulong_t, vlen, u32, pos_low, u32, pos_high) { loff_t pos = ((loff_t)pos_high << 32) | pos_low; - return do_compat_preadv64(fd, vec, vlen, pos, 0); + return do_preadv(fd, vec, vlen, pos, 0); } #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2 COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd, - const struct compat_iovec __user *,vec, + const struct iovec __user *, vec, unsigned long, vlen, loff_t, pos, rwf_t, flags) { if (pos == -1) - return do_compat_readv(fd, vec, vlen, flags); - - return do_compat_preadv64(fd, vec, vlen, pos, flags); + return do_readv(fd, vec, vlen, flags); + return do_preadv(fd, vec, vlen, pos, flags); } #endif COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd, - const struct compat_iovec __user *,vec, + const struct iovec __user *, vec, compat_ulong_t, vlen, u32, pos_low, u32, pos_high, rwf_t, flags) { loff_t pos = ((loff_t)pos_high << 32) | pos_low; if (pos == -1) - return do_compat_readv(fd, vec, vlen, flags); - - return do_compat_preadv64(fd, vec, vlen, pos, flags); -} - -static size_t compat_writev(struct file *file, - const struct compat_iovec __user *vec, - unsigned long vlen, loff_t *pos, rwf_t flags) -{ - struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov = iovstack; - struct iov_iter iter; - ssize_t ret; - - ret = compat_import_iovec(WRITE, vec, vlen, UIO_FASTIOV, &iov, &iter); - if (ret >= 0) { - file_start_write(file); - ret = do_iter_write(file, &iter, pos, flags); - file_end_write(file); - kfree(iov); - } - if (ret > 0) - add_wchar(current, ret); - inc_syscw(current); - return ret; -} - -static size_t do_compat_writev(compat_ulong_t fd, - const struct compat_iovec __user* vec, - compat_ulong_t vlen, rwf_t flags) -{ - struct fd f = fdget_pos(fd); - ssize_t ret; - loff_t pos; - - if (!f.file) - return -EBADF; - pos = f.file->f_pos; - ret = compat_writev(f.file, vec, vlen, &pos, flags); - if (ret >= 0) - f.file->f_pos = pos; - fdput_pos(f); - return ret; -} - -COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, - const struct compat_iovec __user *, vec, - compat_ulong_t, vlen) -{ - return do_compat_writev(fd, vec, vlen, 0); -} - -static long do_compat_pwritev64(unsigned long fd, - const struct compat_iovec __user *vec, - unsigned long vlen, loff_t pos, rwf_t flags) -{ - struct fd f; - ssize_t ret; - - if (pos < 0) - return -EINVAL; - f = fdget(fd); - if (!f.file) - return -EBADF; - ret = -ESPIPE; - if (f.file->f_mode & FMODE_PWRITE) - ret = compat_writev(f.file, vec, vlen, &pos, flags); - fdput(f); - return ret; + return do_readv(fd, vec, vlen, flags); + return do_preadv(fd, vec, vlen, pos, flags); } #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64 COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, - const struct compat_iovec __user *,vec, + const struct iovec __user *, vec, unsigned long, vlen, loff_t, pos) { - return do_compat_pwritev64(fd, vec, vlen, pos, 0); + return do_pwritev(fd, vec, vlen, pos, 0); } #endif COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd, - const struct compat_iovec __user *,vec, + const struct iovec __user *,vec, compat_ulong_t, vlen, u32, pos_low, u32, pos_high) { loff_t pos = ((loff_t)pos_high << 32) | pos_low; - return do_compat_pwritev64(fd, vec, vlen, pos, 0); + return do_pwritev(fd, vec, vlen, pos, 0); } #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2 COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd, - const struct compat_iovec __user *,vec, + const struct iovec __user *, vec, unsigned long, vlen, loff_t, pos, rwf_t, flags) { if (pos == -1) - return do_compat_writev(fd, vec, vlen, flags); - - return do_compat_pwritev64(fd, vec, vlen, pos, flags); + return do_writev(fd, vec, vlen, flags); + return do_pwritev(fd, vec, vlen, pos, flags); } #endif COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd, - const struct compat_iovec __user *,vec, + const struct iovec __user *,vec, compat_ulong_t, vlen, u32, pos_low, u32, pos_high, rwf_t, flags) { loff_t pos = ((loff_t)pos_high << 32) | pos_low; if (pos == -1) - return do_compat_writev(fd, vec, vlen, flags); - - return do_compat_pwritev64(fd, vec, vlen, pos, flags); + return do_writev(fd, vec, vlen, flags); + return do_pwritev(fd, vec, vlen, pos, flags); } - -#endif +#endif /* CONFIG_COMPAT */ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, loff_t max) |