From 8f546ae1fc5ce8396827d4868c7eee1f1cc6947a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Jan 2018 12:23:05 +0100 Subject: fs: unexport poll_schedule_timeout No users outside of select.c. Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Reviewed-by: Darrick J. Wong --- fs/select.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/select.c') diff --git a/fs/select.c b/fs/select.c index ba879c51288f..a87f396f0313 100644 --- a/fs/select.c +++ b/fs/select.c @@ -233,7 +233,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, add_wait_queue(wait_address, &entry->wait); } -int poll_schedule_timeout(struct poll_wqueues *pwq, int state, +static int poll_schedule_timeout(struct poll_wqueues *pwq, int state, ktime_t *expires, unsigned long slack) { int rc = -EINTR; @@ -258,7 +258,6 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state, return rc; } -EXPORT_SYMBOL(poll_schedule_timeout); /** * poll_select_set_timeout - helper function to setup the timeout value -- cgit v1.2.3-59-g8ed1b From a0f8dcfc607d8d113090ff36eee2ee406463bb0e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Mar 2018 07:15:25 -0800 Subject: fs: cleanup do_pollfd Use straightline code with failure handling gotos instead of a lot of nested conditionals. Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Reviewed-by: Darrick J. Wong --- fs/select.c | 48 +++++++++++++++++++++++------------------------- 1 file changed, 23 insertions(+), 25 deletions(-) (limited to 'fs/select.c') diff --git a/fs/select.c b/fs/select.c index a87f396f0313..25da26253485 100644 --- a/fs/select.c +++ b/fs/select.c @@ -812,34 +812,32 @@ static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait, bool *can_busy_poll, __poll_t busy_flag) { - __poll_t mask; - int fd; - - mask = 0; - fd = pollfd->fd; - if (fd >= 0) { - struct fd f = fdget(fd); - mask = EPOLLNVAL; - if (f.file) { - /* userland u16 ->events contains POLL... bitmap */ - __poll_t filter = demangle_poll(pollfd->events) | - EPOLLERR | EPOLLHUP; - mask = DEFAULT_POLLMASK; - if (f.file->f_op->poll) { - pwait->_key = filter; - pwait->_key |= busy_flag; - mask = f.file->f_op->poll(f.file, pwait); - if (mask & busy_flag) - *can_busy_poll = true; - } - /* Mask out unneeded events. */ - mask &= filter; - fdput(f); - } + int fd = pollfd->fd; + __poll_t mask = 0, filter; + struct fd f; + + if (fd < 0) + goto out; + mask = EPOLLNVAL; + f = fdget(fd); + if (!f.file) + goto out; + + /* userland u16 ->events contains POLL... bitmap */ + filter = demangle_poll(pollfd->events) | EPOLLERR | EPOLLHUP; + mask = DEFAULT_POLLMASK; + if (f.file->f_op->poll) { + pwait->_key = filter | busy_flag; + mask = f.file->f_op->poll(f.file, pwait); + if (mask & busy_flag) + *can_busy_poll = true; } + mask &= filter; /* Mask out unneeded events. */ + fdput(f); + +out: /* ... and so does ->revents */ pollfd->revents = mangle_poll(mask); - return mask; } -- cgit v1.2.3-59-g8ed1b From 9965ed174e7d38896e5d2582159d8ef31ecd4cb5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Mar 2018 07:26:05 -0800 Subject: fs: add new vfs_poll and file_can_poll helpers These abstract out calls to the poll method in preparation for changes in how we poll. Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Reviewed-by: Darrick J. Wong --- drivers/staging/comedi/drivers/serial2002.c | 4 ++-- drivers/vfio/virqfd.c | 2 +- drivers/vhost/vhost.c | 2 +- fs/eventpoll.c | 5 ++--- fs/select.c | 23 ++++++++--------------- include/linux/poll.h | 12 ++++++++++++ mm/memcontrol.c | 2 +- net/9p/trans_fd.c | 18 ++++-------------- virt/kvm/eventfd.c | 2 +- 9 files changed, 32 insertions(+), 38 deletions(-) (limited to 'fs/select.c') diff --git a/drivers/staging/comedi/drivers/serial2002.c b/drivers/staging/comedi/drivers/serial2002.c index b3f3b4a201af..5471b2212a62 100644 --- a/drivers/staging/comedi/drivers/serial2002.c +++ b/drivers/staging/comedi/drivers/serial2002.c @@ -113,7 +113,7 @@ static void serial2002_tty_read_poll_wait(struct file *f, int timeout) long elapsed; __poll_t mask; - mask = f->f_op->poll(f, &table.pt); + mask = vfs_poll(f, &table.pt); if (mask & (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR)) { break; @@ -136,7 +136,7 @@ static int serial2002_tty_read(struct file *f, int timeout) result = -1; if (!IS_ERR(f)) { - if (f->f_op->poll) { + if (file_can_poll(f)) { serial2002_tty_read_poll_wait(f, timeout); if (kernel_read(f, &ch, 1, &pos) == 1) diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c index 085700f1be10..2a1be859ee71 100644 --- a/drivers/vfio/virqfd.c +++ b/drivers/vfio/virqfd.c @@ -166,7 +166,7 @@ int vfio_virqfd_enable(void *opaque, init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup); init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc); - events = irqfd.file->f_op->poll(irqfd.file, &virqfd->pt); + events = vfs_poll(irqfd.file, &virqfd->pt); /* * Check if there was an event already pending on the eventfd diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index f3bd8e941224..f6022881f147 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -208,7 +208,7 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file) if (poll->wqh) return 0; - mask = file->f_op->poll(file, &poll->table); + mask = vfs_poll(file, &poll->table); if (mask) vhost_poll_wakeup(&poll->wait, 0, 0, poll_to_key(mask)); if (mask & EPOLLERR) { diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 602ca4285b2e..67db22fe99c5 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -884,8 +884,7 @@ static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt, pt->_key = epi->event.events; if (!is_file_epoll(epi->ffd.file)) - return epi->ffd.file->f_op->poll(epi->ffd.file, pt) & - epi->event.events; + return vfs_poll(epi->ffd.file, pt) & epi->event.events; ep = epi->ffd.file->private_data; poll_wait(epi->ffd.file, &ep->poll_wait, pt); @@ -2025,7 +2024,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, /* The target file descriptor must support poll */ error = -EPERM; - if (!tf.file->f_op->poll) + if (!file_can_poll(tf.file)) goto error_tgt_fput; /* Check if EPOLLWAKEUP is allowed */ diff --git a/fs/select.c b/fs/select.c index 25da26253485..e30def680b2e 100644 --- a/fs/select.c +++ b/fs/select.c @@ -502,14 +502,10 @@ static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) continue; f = fdget(i); if (f.file) { - const struct file_operations *f_op; - f_op = f.file->f_op; - mask = DEFAULT_POLLMASK; - if (f_op->poll) { - wait_key_set(wait, in, out, - bit, busy_flag); - mask = (*f_op->poll)(f.file, wait); - } + wait_key_set(wait, in, out, bit, + busy_flag); + mask = vfs_poll(f.file, wait); + fdput(f); if ((mask & POLLIN_SET) && (in & bit)) { res_in |= bit; @@ -825,13 +821,10 @@ static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait, /* userland u16 ->events contains POLL... bitmap */ filter = demangle_poll(pollfd->events) | EPOLLERR | EPOLLHUP; - mask = DEFAULT_POLLMASK; - if (f.file->f_op->poll) { - pwait->_key = filter | busy_flag; - mask = f.file->f_op->poll(f.file, pwait); - if (mask & busy_flag) - *can_busy_poll = true; - } + pwait->_key = filter | busy_flag; + mask = vfs_poll(f.file, pwait); + if (mask & busy_flag) + *can_busy_poll = true; mask &= filter; /* Mask out unneeded events. */ fdput(f); diff --git a/include/linux/poll.h b/include/linux/poll.h index a3576da63377..7e0fdcf905d2 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -74,6 +74,18 @@ static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc) pt->_key = ~(__poll_t)0; /* all events enabled */ } +static inline bool file_can_poll(struct file *file) +{ + return file->f_op->poll; +} + +static inline __poll_t vfs_poll(struct file *file, struct poll_table_struct *pt) +{ + if (unlikely(!file->f_op->poll)) + return DEFAULT_POLLMASK; + return file->f_op->poll(file, pt); +} + struct poll_table_entry { struct file *filp; __poll_t key; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2bd3df3d101a..1695f38630f1 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3849,7 +3849,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, if (ret) goto out_put_css; - efile.file->f_op->poll(efile.file, &event->pt); + vfs_poll(efile.file, &event->pt); spin_lock(&memcg->event_list_lock); list_add(&event->list, &memcg->event_list); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 0cfba919d167..3811775692d0 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -231,7 +231,7 @@ static void p9_conn_cancel(struct p9_conn *m, int err) static __poll_t p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err) { - __poll_t ret, n; + __poll_t ret; struct p9_trans_fd *ts = NULL; if (client && client->status == Connected) @@ -243,19 +243,9 @@ p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err) return EPOLLERR; } - if (!ts->rd->f_op->poll) - ret = DEFAULT_POLLMASK; - else - ret = ts->rd->f_op->poll(ts->rd, pt); - - if (ts->rd != ts->wr) { - if (!ts->wr->f_op->poll) - n = DEFAULT_POLLMASK; - else - n = ts->wr->f_op->poll(ts->wr, pt); - ret = (ret & ~EPOLLOUT) | (n & ~EPOLLIN); - } - + ret = vfs_poll(ts->rd, pt); + if (ts->rd != ts->wr) + ret = (ret & ~EPOLLOUT) | (vfs_poll(ts->wr, pt) & ~EPOLLIN); return ret; } diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 6e865e8b5b10..90d30fbe95ae 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -397,7 +397,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) * Check if there was an event already pending on the eventfd * before we registered, and trigger it as if we didn't miss it. */ - events = f.file->f_op->poll(f.file, &irqfd->pt); + events = vfs_poll(f.file, &irqfd->pt); if (events & EPOLLIN) schedule_work(&irqfd->inject); -- cgit v1.2.3-59-g8ed1b From 3deb642f0de4c14f37437dd247f9c77839f043f8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 9 Jan 2018 15:29:24 +0100 Subject: fs: introduce new ->get_poll_head and ->poll_mask methods ->get_poll_head returns the waitqueue that the poll operation is going to sleep on. Note that this means we can only use a single waitqueue for the poll, unlike some current drivers that use two waitqueues for different events. But now that we have keyed wakeups and heavily use those for poll there aren't that many good reason left to keep the multiple waitqueues, and if there are any ->poll is still around, the driver just won't support aio poll. Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Reviewed-by: Darrick J. Wong --- Documentation/filesystems/Locking | 7 ++++++- Documentation/filesystems/vfs.txt | 13 +++++++++++++ fs/select.c | 23 +++++++++++++++++++++++ include/linux/fs.h | 2 ++ include/linux/poll.h | 12 ++++++------ 5 files changed, 50 insertions(+), 7 deletions(-) (limited to 'fs/select.c') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 220bba28f72b..6d227f9d7bd9 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -440,6 +440,8 @@ prototypes: ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); int (*iterate) (struct file *, struct dir_context *); __poll_t (*poll) (struct file *, struct poll_table_struct *); + struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t); + __poll_t (*poll_mask) (struct file *, __poll_t); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); @@ -470,7 +472,7 @@ prototypes: }; locking rules: - All may block. + All except for ->poll_mask may block. ->llseek() locking has moved from llseek to the individual llseek implementations. If your fs is not using generic_file_llseek, you @@ -498,6 +500,9 @@ in sys_read() and friends. the lease within the individual filesystem to record the result of the operation +->poll_mask can be called with or without the waitqueue lock for the waitqueue +returned from ->get_poll_head. + --------------------------- dquot_operations ------------------------------- prototypes: int (*write_dquot) (struct dquot *); diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index f608180ad59d..829a7b7857a4 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -857,6 +857,8 @@ struct file_operations { ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); int (*iterate) (struct file *, struct dir_context *); __poll_t (*poll) (struct file *, struct poll_table_struct *); + struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t); + __poll_t (*poll_mask) (struct file *, __poll_t); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); @@ -901,6 +903,17 @@ otherwise noted. activity on this file and (optionally) go to sleep until there is activity. Called by the select(2) and poll(2) system calls + get_poll_head: Returns the struct wait_queue_head that callers can + wait on. Callers need to check the returned events using ->poll_mask + once woken. Can return NULL to indicate polling is not supported, + or any error code using the ERR_PTR convention to indicate that a + grave error occured and ->poll_mask shall not be called. + + poll_mask: return the mask of EPOLL* values describing the file descriptor + state. Called either before going to sleep on the waitqueue returned by + get_poll_head, or after it has been woken. If ->get_poll_head and + ->poll_mask are implemented ->poll does not need to be implement. + unlocked_ioctl: called by the ioctl(2) system call. compat_ioctl: called by the ioctl(2) system call when 32 bit system calls diff --git a/fs/select.c b/fs/select.c index e30def680b2e..bc3cc0f98896 100644 --- a/fs/select.c +++ b/fs/select.c @@ -34,6 +34,29 @@ #include +__poll_t vfs_poll(struct file *file, struct poll_table_struct *pt) +{ + if (file->f_op->poll) { + return file->f_op->poll(file, pt); + } else if (file_has_poll_mask(file)) { + unsigned int events = poll_requested_events(pt); + struct wait_queue_head *head; + + if (pt && pt->_qproc) { + head = file->f_op->get_poll_head(file, events); + if (!head) + return DEFAULT_POLLMASK; + if (IS_ERR(head)) + return EPOLLERR; + pt->_qproc(file, head, pt); + } + + return file->f_op->poll_mask(file, events); + } else { + return DEFAULT_POLLMASK; + } +} +EXPORT_SYMBOL_GPL(vfs_poll); /* * Estimate expected accuracy in ns from a timeval. diff --git a/include/linux/fs.h b/include/linux/fs.h index 760d8da1b6c7..4b6045ebb2f2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1711,6 +1711,8 @@ struct file_operations { int (*iterate) (struct file *, struct dir_context *); int (*iterate_shared) (struct file *, struct dir_context *); __poll_t (*poll) (struct file *, struct poll_table_struct *); + struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t); + __poll_t (*poll_mask) (struct file *, __poll_t); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); diff --git a/include/linux/poll.h b/include/linux/poll.h index 7e0fdcf905d2..fdf86b4cbc71 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -74,18 +74,18 @@ static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc) pt->_key = ~(__poll_t)0; /* all events enabled */ } -static inline bool file_can_poll(struct file *file) +static inline bool file_has_poll_mask(struct file *file) { - return file->f_op->poll; + return file->f_op->get_poll_head && file->f_op->poll_mask; } -static inline __poll_t vfs_poll(struct file *file, struct poll_table_struct *pt) +static inline bool file_can_poll(struct file *file) { - if (unlikely(!file->f_op->poll)) - return DEFAULT_POLLMASK; - return file->f_op->poll(file, pt); + return file->f_op->poll || file_has_poll_mask(file); } +__poll_t vfs_poll(struct file *file, struct poll_table_struct *pt); + struct poll_table_entry { struct file *filp; __poll_t key; -- cgit v1.2.3-59-g8ed1b