aboutsummaryrefslogtreecommitdiffstats
path: root/fs/eventpoll.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/eventpoll.c')
-rw-r--r--fs/eventpoll.c34
1 files changed, 32 insertions, 2 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 06f4c5ae1451..52954d4637b5 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -307,7 +307,7 @@ static void unlist_file(struct epitems_head *head)
static long long_zero;
static long long_max = LONG_MAX;
-struct ctl_table epoll_table[] = {
+static struct ctl_table epoll_table[] = {
{
.procname = "max_user_watches",
.data = &max_user_watches,
@@ -319,6 +319,13 @@ struct ctl_table epoll_table[] = {
},
{ }
};
+
+static void __init epoll_sysctls_init(void)
+{
+ register_sysctl("fs/epoll", epoll_table);
+}
+#else
+#define epoll_sysctls_init() do { } while (0)
#endif /* CONFIG_SYSCTL */
static const struct file_operations eventpoll_fops;
@@ -1058,7 +1065,7 @@ static inline bool list_add_tail_lockless(struct list_head *new,
* added to the list from another CPU: the winner observes
* new->next == new.
*/
- if (cmpxchg(&new->next, new, head) != new)
+ if (!try_cmpxchg(&new->next, &new, head))
return false;
/*
@@ -1740,6 +1747,21 @@ static struct timespec64 *ep_timeout_to_timespec(struct timespec64 *to, long ms)
return to;
}
+/*
+ * autoremove_wake_function, but remove even on failure to wake up, because we
+ * know that default_wake_function/ttwu will only fail if the thread is already
+ * woken, and in that case the ep_poll loop will remove the entry anyways, not
+ * try to reuse it.
+ */
+static int ep_autoremove_wake_function(struct wait_queue_entry *wq_entry,
+ unsigned int mode, int sync, void *key)
+{
+ int ret = default_wake_function(wq_entry, mode, sync, key);
+
+ list_del_init(&wq_entry->entry);
+ return ret;
+}
+
/**
* ep_poll - Retrieves ready events, and delivers them to the caller-supplied
* event buffer.
@@ -1821,8 +1843,15 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
* normal wakeup path no need to call __remove_wait_queue()
* explicitly, thus ep->lock is not taken, which halts the
* event delivery.
+ *
+ * In fact, we now use an even more aggressive function that
+ * unconditionally removes, because we don't reuse the wait
+ * entry between loop iterations. This lets us also avoid the
+ * performance issue if a process is killed, causing all of its
+ * threads to wake up without being removed normally.
*/
init_wait(&wait);
+ wait.func = ep_autoremove_wake_function;
write_lock_irq(&ep->lock);
/*
@@ -2378,6 +2407,7 @@ static int __init eventpoll_init(void)
/* Allocates slab cache used to allocate "struct eppoll_entry" */
pwq_cache = kmem_cache_create("eventpoll_pwq",
sizeof(struct eppoll_entry), 0, SLAB_PANIC|SLAB_ACCOUNT, NULL);
+ epoll_sysctls_init();
ephead_cache = kmem_cache_create("ep_head",
sizeof(struct epitems_head), 0, SLAB_PANIC|SLAB_ACCOUNT, NULL);