aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/fs/eventpoll.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/eventpoll.c')
-rw-r--r--fs/eventpoll.c56
1 files changed, 50 insertions, 6 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 1e009cad8d5c..8a74a2a52e0f 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -92,7 +92,12 @@
*/
/* Epoll private bits inside the event mask */
-#define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET)
+#define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET | EPOLLEXCLUSIVE)
+
+#define EPOLLINOUT_BITS (POLLIN | POLLOUT)
+
+#define EPOLLEXCLUSIVE_OK_BITS (EPOLLINOUT_BITS | POLLERR | POLLHUP | \
+ EPOLLWAKEUP | EPOLLET | EPOLLEXCLUSIVE)
/* Maximum number of nesting allowed inside epoll sets */
#define EP_MAX_NESTS 4
@@ -1002,6 +1007,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
unsigned long flags;
struct epitem *epi = ep_item_from_wait(wait);
struct eventpoll *ep = epi->ep;
+ int ewake = 0;
if ((unsigned long)key & POLLFREE) {
ep_pwq_from_wait(wait)->whead = NULL;
@@ -1066,8 +1072,25 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
* Wake up ( if active ) both the eventpoll wait list and the ->poll()
* wait list.
*/
- if (waitqueue_active(&ep->wq))
+ if (waitqueue_active(&ep->wq)) {
+ if ((epi->event.events & EPOLLEXCLUSIVE) &&
+ !((unsigned long)key & POLLFREE)) {
+ switch ((unsigned long)key & EPOLLINOUT_BITS) {
+ case POLLIN:
+ if (epi->event.events & POLLIN)
+ ewake = 1;
+ break;
+ case POLLOUT:
+ if (epi->event.events & POLLOUT)
+ ewake = 1;
+ break;
+ case 0:
+ ewake = 1;
+ break;
+ }
+ }
wake_up_locked(&ep->wq);
+ }
if (waitqueue_active(&ep->poll_wait))
pwake++;
@@ -1078,6 +1101,9 @@ out_unlock:
if (pwake)
ep_poll_safewake(&ep->poll_wait);
+ if (epi->event.events & EPOLLEXCLUSIVE)
+ return ewake;
+
return 1;
}
@@ -1095,7 +1121,10 @@ static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);
pwq->whead = whead;
pwq->base = epi;
- add_wait_queue(whead, &pwq->wait);
+ if (epi->event.events & EPOLLEXCLUSIVE)
+ add_wait_queue_exclusive(whead, &pwq->wait);
+ else
+ add_wait_queue(whead, &pwq->wait);
list_add_tail(&pwq->llink, &epi->pwqlist);
epi->nwait++;
} else {
@@ -1587,7 +1616,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
{
int res = 0, eavail, timed_out = 0;
unsigned long flags;
- long slack = 0;
+ u64 slack = 0;
wait_queue_t wait;
ktime_t expires, *to = NULL;
@@ -1862,6 +1891,19 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
goto error_tgt_fput;
/*
+ * epoll adds to the wakeup queue at EPOLL_CTL_ADD time only,
+ * so EPOLLEXCLUSIVE is not allowed for a EPOLL_CTL_MOD operation.
+ * Also, we do not currently supported nested exclusive wakeups.
+ */
+ if (epds.events & EPOLLEXCLUSIVE) {
+ if (op == EPOLL_CTL_MOD)
+ goto error_tgt_fput;
+ if (op == EPOLL_CTL_ADD && (is_file_epoll(tf.file) ||
+ (epds.events & ~EPOLLEXCLUSIVE_OK_BITS)))
+ goto error_tgt_fput;
+ }
+
+ /*
* At this point it is safe to assume that the "private_data" contains
* our own data structure.
*/
@@ -1932,8 +1974,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
break;
case EPOLL_CTL_MOD:
if (epi) {
- epds.events |= POLLERR | POLLHUP;
- error = ep_modify(ep, epi, &epds);
+ if (!(epi->event.events & EPOLLEXCLUSIVE)) {
+ epds.events |= POLLERR | POLLHUP;
+ error = ep_modify(ep, epi, &epds);
+ }
} else
error = -ENOENT;
break;