From f6ef7b7bb285c7b63a36f370b40b990891a9aa1f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Sep 2020 16:22:29 +0200 Subject: proc: remove a level of indentation in proc_get_inode Just return early on inode allocation failure. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/proc/inode.c | 72 +++++++++++++++++++++++++++++---------------------------- 1 file changed, 37 insertions(+), 35 deletions(-) (limited to 'fs') diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 28d6105e908e..016b1302cbab 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -619,42 +619,44 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) { struct inode *inode = new_inode(sb); - if (inode) { - inode->i_ino = de->low_ino; - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); - PROC_I(inode)->pde = de; - - if (is_empty_pde(de)) { - make_empty_dir_inode(inode); - return inode; - } - if (de->mode) { - inode->i_mode = de->mode; - inode->i_uid = de->uid; - inode->i_gid = de->gid; - } - if (de->size) - inode->i_size = de->size; - if (de->nlink) - set_nlink(inode, de->nlink); - - if (S_ISREG(inode->i_mode)) { - inode->i_op = de->proc_iops; - inode->i_fop = &proc_reg_file_ops; + if (!inode) { + pde_put(de); + return NULL; + } + + inode->i_ino = de->low_ino; + inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + PROC_I(inode)->pde = de; + if (is_empty_pde(de)) { + make_empty_dir_inode(inode); + return inode; + } + + if (de->mode) { + inode->i_mode = de->mode; + inode->i_uid = de->uid; + inode->i_gid = de->gid; + } + if (de->size) + inode->i_size = de->size; + if (de->nlink) + set_nlink(inode, de->nlink); + + if (S_ISREG(inode->i_mode)) { + inode->i_op = de->proc_iops; + inode->i_fop = &proc_reg_file_ops; #ifdef CONFIG_COMPAT - if (!de->proc_ops->proc_compat_ioctl) { - inode->i_fop = &proc_reg_file_ops_no_compat; - } + if (!de->proc_ops->proc_compat_ioctl) + inode->i_fop = &proc_reg_file_ops_no_compat; #endif - } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = de->proc_iops; - inode->i_fop = de->proc_dir_ops; - } else if (S_ISLNK(inode->i_mode)) { - inode->i_op = de->proc_iops; - inode->i_fop = NULL; - } else - BUG(); - } else - pde_put(de); + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = de->proc_iops; + inode->i_fop = de->proc_dir_ops; + } else if (S_ISLNK(inode->i_mode)) { + inode->i_op = de->proc_iops; + inode->i_fop = NULL; + } else { + BUG(); + } return inode; } -- cgit v1.2.3-59-g8ed1b From 906146f44922392f00293e52ea9b61168b75ac62 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Sep 2020 16:22:30 +0200 Subject: proc: cleanup the compat vs no compat file ops Instead of providing a special no-compat version provide a special compat version for operations with ->compat_ioctl. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/proc/inode.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 016b1302cbab..93dd20457375 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -572,9 +572,6 @@ static const struct file_operations proc_reg_file_ops = { .write = proc_reg_write, .poll = proc_reg_poll, .unlocked_ioctl = proc_reg_unlocked_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = proc_reg_compat_ioctl, -#endif .mmap = proc_reg_mmap, .get_unmapped_area = proc_reg_get_unmapped_area, .open = proc_reg_open, @@ -582,12 +579,13 @@ static const struct file_operations proc_reg_file_ops = { }; #ifdef CONFIG_COMPAT -static const struct file_operations proc_reg_file_ops_no_compat = { +static const struct file_operations proc_reg_file_ops_compat = { .llseek = proc_reg_llseek, .read = proc_reg_read, .write = proc_reg_write, .poll = proc_reg_poll, .unlocked_ioctl = proc_reg_unlocked_ioctl, + .compat_ioctl = proc_reg_compat_ioctl, .mmap = proc_reg_mmap, .get_unmapped_area = proc_reg_get_unmapped_area, .open = proc_reg_open, @@ -646,8 +644,8 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) inode->i_op = de->proc_iops; inode->i_fop = &proc_reg_file_ops; #ifdef CONFIG_COMPAT - if (!de->proc_ops->proc_compat_ioctl) - inode->i_fop = &proc_reg_file_ops_no_compat; + if (de->proc_ops->proc_compat_ioctl) + inode->i_fop = &proc_reg_file_ops_compat; #endif } else if (S_ISDIR(inode->i_mode)) { inode->i_op = de->proc_iops; -- cgit v1.2.3-59-g8ed1b From fd5a13f4893c8df2a5a3af8599adecb52d05fe89 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Sep 2020 16:22:31 +0200 Subject: proc: add a read_iter method to proc proc_ops This will allow proc files to implement iter read semantics. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/proc/inode.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++--- include/linux/proc_fs.h | 1 + 2 files changed, 51 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 93dd20457375..58c075e2a452 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -297,6 +297,21 @@ static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence) return rv; } +static ssize_t proc_reg_read_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + struct proc_dir_entry *pde = PDE(file_inode(iocb->ki_filp)); + ssize_t ret; + + if (pde_is_permanent(pde)) + return pde->proc_ops->proc_read_iter(iocb, iter); + + if (!use_pde(pde)) + return -EIO; + ret = pde->proc_ops->proc_read_iter(iocb, iter); + unuse_pde(pde); + return ret; +} + static ssize_t pde_read(struct proc_dir_entry *pde, struct file *file, char __user *buf, size_t count, loff_t *ppos) { typeof_member(struct proc_ops, proc_read) read; @@ -578,6 +593,18 @@ static const struct file_operations proc_reg_file_ops = { .release = proc_reg_release, }; +static const struct file_operations proc_iter_file_ops = { + .llseek = proc_reg_llseek, + .read_iter = proc_reg_read_iter, + .write = proc_reg_write, + .poll = proc_reg_poll, + .unlocked_ioctl = proc_reg_unlocked_ioctl, + .mmap = proc_reg_mmap, + .get_unmapped_area = proc_reg_get_unmapped_area, + .open = proc_reg_open, + .release = proc_reg_release, +}; + #ifdef CONFIG_COMPAT static const struct file_operations proc_reg_file_ops_compat = { .llseek = proc_reg_llseek, @@ -591,6 +618,19 @@ static const struct file_operations proc_reg_file_ops_compat = { .open = proc_reg_open, .release = proc_reg_release, }; + +static const struct file_operations proc_iter_file_ops_compat = { + .llseek = proc_reg_llseek, + .read_iter = proc_reg_read_iter, + .write = proc_reg_write, + .poll = proc_reg_poll, + .unlocked_ioctl = proc_reg_unlocked_ioctl, + .compat_ioctl = proc_reg_compat_ioctl, + .mmap = proc_reg_mmap, + .get_unmapped_area = proc_reg_get_unmapped_area, + .open = proc_reg_open, + .release = proc_reg_release, +}; #endif static void proc_put_link(void *p) @@ -642,10 +682,17 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) if (S_ISREG(inode->i_mode)) { inode->i_op = de->proc_iops; - inode->i_fop = &proc_reg_file_ops; + if (de->proc_ops->proc_read_iter) + inode->i_fop = &proc_iter_file_ops; + else + inode->i_fop = &proc_reg_file_ops; #ifdef CONFIG_COMPAT - if (de->proc_ops->proc_compat_ioctl) - inode->i_fop = &proc_reg_file_ops_compat; + if (de->proc_ops->proc_compat_ioctl) { + if (de->proc_ops->proc_read_iter) + inode->i_fop = &proc_iter_file_ops_compat; + else + inode->i_fop = &proc_reg_file_ops_compat; + } #endif } else if (S_ISDIR(inode->i_mode)) { inode->i_op = de->proc_iops; diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 2df965cd0974..270cab43ca3d 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -30,6 +30,7 @@ struct proc_ops { unsigned int proc_flags; int (*proc_open)(struct inode *, struct file *); ssize_t (*proc_read)(struct file *, char __user *, size_t, loff_t *); + ssize_t (*proc_read_iter)(struct kiocb *, struct iov_iter *); ssize_t (*proc_write)(struct file *, const char __user *, size_t, loff_t *); loff_t (*proc_lseek)(struct file *, loff_t, int); int (*proc_release)(struct inode *, struct file *); -- cgit v1.2.3-59-g8ed1b From 4bd6a7353ee14697fea645e941354976d2c4a452 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 3 Sep 2020 16:22:32 +0200 Subject: sysctl: Convert to iter interfaces Using the read_iter/write_iter interfaces allows for in-kernel users to set sysctls without using set_fs(). Also, the buffer is a string, so give it the real type of 'char *', not void *. [AV: Christoph's fixup folded in] Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/proc/proc_sysctl.c | 48 +++++++++++++++++++++++----------------------- include/linux/bpf-cgroup.h | 2 +- kernel/bpf/cgroup.c | 2 +- 3 files changed, 26 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 6c1166ccdaea..317899222d7f 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -540,13 +541,14 @@ out: return err; } -static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf, - size_t count, loff_t *ppos, int write) +static ssize_t proc_sys_call_handler(struct kiocb *iocb, struct iov_iter *iter, + int write) { - struct inode *inode = file_inode(filp); + struct inode *inode = file_inode(iocb->ki_filp); struct ctl_table_header *head = grab_header(inode); struct ctl_table *table = PROC_I(inode)->sysctl_entry; - void *kbuf; + size_t count = iov_iter_count(iter); + char *kbuf; ssize_t error; if (IS_ERR(head)) @@ -569,32 +571,30 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf, error = -ENOMEM; if (count >= KMALLOC_MAX_SIZE) goto out; + kbuf = kzalloc(count + 1, GFP_KERNEL); + if (!kbuf) + goto out; if (write) { - kbuf = memdup_user_nul(ubuf, count); - if (IS_ERR(kbuf)) { - error = PTR_ERR(kbuf); - goto out; - } - } else { - kbuf = kzalloc(count, GFP_KERNEL); - if (!kbuf) - goto out; + error = -EFAULT; + if (!copy_from_iter_full(kbuf, count, iter)) + goto out_free_buf; + kbuf[count] = '\0'; } error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, &kbuf, &count, - ppos); + &iocb->ki_pos); if (error) goto out_free_buf; /* careful: calling conventions are nasty here */ - error = table->proc_handler(table, write, kbuf, &count, ppos); + error = table->proc_handler(table, write, kbuf, &count, &iocb->ki_pos); if (error) goto out_free_buf; if (!write) { error = -EFAULT; - if (copy_to_user(ubuf, kbuf, count)) + if (copy_to_iter(kbuf, count, iter) < count) goto out_free_buf; } @@ -607,16 +607,14 @@ out: return error; } -static ssize_t proc_sys_read(struct file *filp, char __user *buf, - size_t count, loff_t *ppos) +static ssize_t proc_sys_read(struct kiocb *iocb, struct iov_iter *iter) { - return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 0); + return proc_sys_call_handler(iocb, iter, 0); } -static ssize_t proc_sys_write(struct file *filp, const char __user *buf, - size_t count, loff_t *ppos) +static ssize_t proc_sys_write(struct kiocb *iocb, struct iov_iter *iter) { - return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1); + return proc_sys_call_handler(iocb, iter, 1); } static int proc_sys_open(struct inode *inode, struct file *filp) @@ -853,8 +851,10 @@ static int proc_sys_getattr(const struct path *path, struct kstat *stat, static const struct file_operations proc_sys_file_operations = { .open = proc_sys_open, .poll = proc_sys_poll, - .read = proc_sys_read, - .write = proc_sys_write, + .read_iter = proc_sys_read, + .write_iter = proc_sys_write, + .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, .llseek = default_llseek, }; diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 64f367044e25..82b26a1386d8 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -136,7 +136,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, struct ctl_table *table, int write, - void **buf, size_t *pcount, loff_t *ppos, + char **buf, size_t *pcount, loff_t *ppos, enum bpf_attach_type type); int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level, diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 83ff127ef7ae..226299352a76 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1226,7 +1226,7 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = { */ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, struct ctl_table *table, int write, - void **buf, size_t *pcount, loff_t *ppos, + char **buf, size_t *pcount, loff_t *ppos, enum bpf_attach_type type) { struct bpf_sysctl_kern ctx = { -- cgit v1.2.3-59-g8ed1b From 4d03e3cc59828c82ee89ea6e27a2f3cdf95aaadf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Sep 2020 16:22:33 +0200 Subject: fs: don't allow kernel reads and writes without iter ops Don't allow calling ->read or ->write with set_fs as a preparation for killing off set_fs. All the instances that we use kernel_read/write on are using the iter ops already. If a file has both the regular ->read/->write methods and the iter variants those could have different semantics for messed up enough drivers. Also fails the kernel access to them in that case. Signed-off-by: Christoph Hellwig Reviewed-by: Kees Cook Signed-off-by: Al Viro --- fs/read_write.c | 67 ++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 25 deletions(-) (limited to 'fs') diff --git a/fs/read_write.c b/fs/read_write.c index 5db58b8c78d0..702c4301d9eb 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -419,27 +419,41 @@ static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo return ret; } +static int warn_unsupported(struct file *file, const char *op) +{ + pr_warn_ratelimited( + "kernel %s not supported for file %pD4 (pid: %d comm: %.20s)\n", + op, file, current->pid, current->comm); + return -EINVAL; +} + ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) { - mm_segment_t old_fs = get_fs(); + struct kvec iov = { + .iov_base = buf, + .iov_len = min_t(size_t, count, MAX_RW_COUNT), + }; + struct kiocb kiocb; + struct iov_iter iter; ssize_t ret; if (WARN_ON_ONCE(!(file->f_mode & FMODE_READ))) return -EINVAL; if (!(file->f_mode & FMODE_CAN_READ)) return -EINVAL; + /* + * Also fail if ->read_iter and ->read are both wired up as that + * implies very convoluted semantics. + */ + if (unlikely(!file->f_op->read_iter || file->f_op->read)) + return warn_unsupported(file, "read"); - if (count > MAX_RW_COUNT) - count = MAX_RW_COUNT; - set_fs(KERNEL_DS); - if (file->f_op->read) - ret = file->f_op->read(file, (void __user *)buf, count, pos); - else if (file->f_op->read_iter) - ret = new_sync_read(file, (void __user *)buf, count, pos); - else - ret = -EINVAL; - set_fs(old_fs); + init_sync_kiocb(&kiocb, file); + kiocb.ki_pos = *pos; + iov_iter_kvec(&iter, READ, &iov, 1, iov.iov_len); + ret = file->f_op->read_iter(&kiocb, &iter); if (ret > 0) { + *pos = kiocb.ki_pos; fsnotify_access(file); add_rchar(current, ret); } @@ -510,28 +524,31 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t /* caller is responsible for file_start_write/file_end_write */ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) { - mm_segment_t old_fs; - const char __user *p; + struct kvec iov = { + .iov_base = (void *)buf, + .iov_len = min_t(size_t, count, MAX_RW_COUNT), + }; + struct kiocb kiocb; + struct iov_iter iter; ssize_t ret; if (WARN_ON_ONCE(!(file->f_mode & FMODE_WRITE))) return -EBADF; if (!(file->f_mode & FMODE_CAN_WRITE)) return -EINVAL; + /* + * Also fail if ->write_iter and ->write are both wired up as that + * implies very convoluted semantics. + */ + if (unlikely(!file->f_op->write_iter || file->f_op->write)) + return warn_unsupported(file, "write"); - old_fs = get_fs(); - set_fs(KERNEL_DS); - p = (__force const char __user *)buf; - if (count > MAX_RW_COUNT) - count = MAX_RW_COUNT; - if (file->f_op->write) - ret = file->f_op->write(file, p, count, pos); - else if (file->f_op->write_iter) - ret = new_sync_write(file, p, count, pos); - else - ret = -EINVAL; - set_fs(old_fs); + init_sync_kiocb(&kiocb, file); + kiocb.ki_pos = *pos; + iov_iter_kvec(&iter, WRITE, &iov, 1, iov.iov_len); + ret = file->f_op->write_iter(&kiocb, &iter); if (ret > 0) { + *pos = kiocb.ki_pos; fsnotify_modify(file); add_wchar(current, ret); } -- cgit v1.2.3-59-g8ed1b From 36e2c7421f02a22f71c9283e55fdb672a9eb58e7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Sep 2020 16:22:34 +0200 Subject: fs: don't allow splice read/write without explicit ops default_file_splice_write is the last piece of generic code that uses set_fs to make the uaccess routines operate on kernel pointers. It implements a "fallback loop" for splicing from files that do not actually provide a proper splice_read method. The usual file systems and other high bandwidth instances all provide a ->splice_read, so this just removes support for various device drivers and procfs/debugfs files. If splice support for any of those turns out to be important it can be added back by switching them to the iter ops and using generic_file_splice_read. Signed-off-by: Christoph Hellwig Reviewed-by: Kees Cook Signed-off-by: Al Viro --- fs/read_write.c | 2 +- fs/splice.c | 130 ++++++----------------------------------------------- include/linux/fs.h | 2 - 3 files changed, 15 insertions(+), 119 deletions(-) (limited to 'fs') diff --git a/fs/read_write.c b/fs/read_write.c index 702c4301d9eb..8c61f67453e3 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1077,7 +1077,7 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, } EXPORT_SYMBOL(vfs_iter_write); -ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, +static ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, unsigned long vlen, loff_t *pos, rwf_t flags) { struct iovec iovstack[UIO_FASTIOV]; diff --git a/fs/splice.c b/fs/splice.c index d7c8a7c4db07..412df7b48f9e 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -342,89 +342,6 @@ const struct pipe_buf_operations nosteal_pipe_buf_ops = { }; EXPORT_SYMBOL(nosteal_pipe_buf_ops); -static ssize_t kernel_readv(struct file *file, const struct kvec *vec, - unsigned long vlen, loff_t offset) -{ - mm_segment_t old_fs; - loff_t pos = offset; - ssize_t res; - - old_fs = get_fs(); - set_fs(KERNEL_DS); - /* The cast to a user pointer is valid due to the set_fs() */ - res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos, 0); - set_fs(old_fs); - - return res; -} - -static ssize_t default_file_splice_read(struct file *in, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, - unsigned int flags) -{ - struct kvec *vec, __vec[PIPE_DEF_BUFFERS]; - struct iov_iter to; - struct page **pages; - unsigned int nr_pages; - unsigned int mask; - size_t offset, base, copied = 0; - ssize_t res; - int i; - - if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) - return -EAGAIN; - - /* - * Try to keep page boundaries matching to source pagecache ones - - * it probably won't be much help, but... - */ - offset = *ppos & ~PAGE_MASK; - - iov_iter_pipe(&to, READ, pipe, len + offset); - - res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &base); - if (res <= 0) - return -ENOMEM; - - nr_pages = DIV_ROUND_UP(res + base, PAGE_SIZE); - - vec = __vec; - if (nr_pages > PIPE_DEF_BUFFERS) { - vec = kmalloc_array(nr_pages, sizeof(struct kvec), GFP_KERNEL); - if (unlikely(!vec)) { - res = -ENOMEM; - goto out; - } - } - - mask = pipe->ring_size - 1; - pipe->bufs[to.head & mask].offset = offset; - pipe->bufs[to.head & mask].len -= offset; - - for (i = 0; i < nr_pages; i++) { - size_t this_len = min_t(size_t, len, PAGE_SIZE - offset); - vec[i].iov_base = page_address(pages[i]) + offset; - vec[i].iov_len = this_len; - len -= this_len; - offset = 0; - } - - res = kernel_readv(in, vec, nr_pages, *ppos); - if (res > 0) { - copied = res; - *ppos += res; - } - - if (vec != __vec) - kfree(vec); -out: - for (i = 0; i < nr_pages; i++) - put_page(pages[i]); - kvfree(pages); - iov_iter_advance(&to, copied); /* truncates and discards */ - return res; -} - /* * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' * using sendpage(). Return the number of bytes sent. @@ -788,33 +705,6 @@ done: EXPORT_SYMBOL(iter_file_splice_write); -static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf, - struct splice_desc *sd) -{ - int ret; - void *data; - loff_t tmp = sd->pos; - - data = kmap(buf->page); - ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp); - kunmap(buf->page); - - return ret; -} - -static ssize_t default_file_splice_write(struct pipe_inode_info *pipe, - struct file *out, loff_t *ppos, - size_t len, unsigned int flags) -{ - ssize_t ret; - - ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf); - if (ret > 0) - *ppos += ret; - - return ret; -} - /** * generic_splice_sendpage - splice data from a pipe to a socket * @pipe: pipe to splice from @@ -836,15 +726,23 @@ ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, EXPORT_SYMBOL(generic_splice_sendpage); +static int warn_unsupported(struct file *file, const char *op) +{ + pr_debug_ratelimited( + "splice %s not supported for file %pD4 (pid: %d comm: %.20s)\n", + op, file, current->pid, current->comm); + return -EINVAL; +} + /* * Attempt to initiate a splice from pipe to file. */ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { - if (out->f_op->splice_write) - return out->f_op->splice_write(pipe, out, ppos, len, flags); - return default_file_splice_write(pipe, out, ppos, len, flags); + if (unlikely(!out->f_op->splice_write)) + return warn_unsupported(out, "write"); + return out->f_op->splice_write(pipe, out, ppos, len, flags); } /* @@ -866,9 +764,9 @@ static long do_splice_to(struct file *in, loff_t *ppos, if (unlikely(len > MAX_RW_COUNT)) len = MAX_RW_COUNT; - if (in->f_op->splice_read) - return in->f_op->splice_read(in, ppos, pipe, len, flags); - return default_file_splice_read(in, ppos, pipe, len, flags); + if (unlikely(!in->f_op->splice_read)) + return warn_unsupported(in, "read"); + return in->f_op->splice_read(in, ppos, pipe, len, flags); } /** diff --git a/include/linux/fs.h b/include/linux/fs.h index e019ea2f1347..d33cc3e8ed41 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1894,8 +1894,6 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); -extern ssize_t vfs_readv(struct file *, const struct iovec __user *, - unsigned long, loff_t *, rwf_t); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); extern ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, -- cgit v1.2.3-59-g8ed1b From 4c207ef48269377236cd38979197c5e1631c8c16 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 3 Oct 2020 03:55:22 +0100 Subject: fs: Allow a NULL pos pointer to __kernel_write Linus prefers that callers be allowed to pass in a NULL pointer for ppos like new_sync_write(). Reviewed-by: Christoph Hellwig Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Al Viro --- fs/read_write.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/read_write.c b/fs/read_write.c index 8c61f67453e3..516eb51af70e 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -544,11 +544,12 @@ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t return warn_unsupported(file, "write"); init_sync_kiocb(&kiocb, file); - kiocb.ki_pos = *pos; + kiocb.ki_pos = pos ? *pos : 0; iov_iter_kvec(&iter, WRITE, &iov, 1, iov.iov_len); ret = file->f_op->write_iter(&kiocb, &iter); if (ret > 0) { - *pos = kiocb.ki_pos; + if (pos) + *pos = kiocb.ki_pos; fsnotify_modify(file); add_wchar(current, ret); } -- cgit v1.2.3-59-g8ed1b From 7b84b665c874f60d84547635341e418f20cbbab2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 3 Oct 2020 03:55:23 +0100 Subject: fs: Allow a NULL pos pointer to __kernel_read Match the behaviour of new_sync_read() and __kernel_write(). Reviewed-by: Christoph Hellwig Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Al Viro --- fs/read_write.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/read_write.c b/fs/read_write.c index 516eb51af70e..498cc00f3c08 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -449,11 +449,12 @@ ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) return warn_unsupported(file, "read"); init_sync_kiocb(&kiocb, file); - kiocb.ki_pos = *pos; + kiocb.ki_pos = pos ? *pos : 0; iov_iter_kvec(&iter, READ, &iov, 1, iov.iov_len); ret = file->f_op->read_iter(&kiocb, &iter); if (ret > 0) { - *pos = kiocb.ki_pos; + if (pos) + *pos = kiocb.ki_pos; fsnotify_access(file); add_rchar(current, ret); } -- cgit v1.2.3-59-g8ed1b