summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormpi <mpi@openbsd.org>2017-11-04 14:13:53 +0000
committermpi <mpi@openbsd.org>2017-11-04 14:13:53 +0000
commit290a836d36de7cb20c274caa66f9cbc92c591330 (patch)
tree8e7a6ab55184bcee638aac902590edc4b82cbed3
parentfuse_loop_mt() isn't implemented so return an error value. (diff)
downloadwireguard-openbsd-290a836d36de7cb20c274caa66f9cbc92c591330.tar.xz
wireguard-openbsd-290a836d36de7cb20c274caa66f9cbc92c591330.zip
Make it possible for multiple threads to enter kqueue_scan() in parallel.
This is a requirement to use a sleeping lock inside kqueue filters. It is now possible, but not recommended, to sleep inside ``f_event''. Threads iterating over the list of pending events are now recognizing and skipping other threads' markers. knote_acquire() and knote_release() must be used to "own" a knote to make sure no other thread is sleeping with a reference on it. Acquire and marker logic taken from DragonFly but the KERNEL_LOCK() is still serializing the execution of the kqueue code. This also enable the NET_LOCK() in socket filters. Tested by abieber@ & juanfra@, run by naddy@ in a bulk, ok visa@, bluhm@
-rw-r--r--sys/kern/kern_event.c61
-rw-r--r--sys/kern/uipc_socket.c19
-rw-r--r--sys/miscfs/fifofs/fifo_vnops.c14
-rw-r--r--sys/sys/event.h26
-rw-r--r--sys/sys/socketvar.h5
5 files changed, 101 insertions, 24 deletions
diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c
index e2de0aabbf8..b13feef56a4 100644
--- a/sys/kern/kern_event.c
+++ b/sys/kern/kern_event.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_event.c,v 1.81 2017/10/11 08:06:56 mpi Exp $ */
+/* $OpenBSD: kern_event.c,v 1.82 2017/11/04 14:13:53 mpi Exp $ */
/*-
* Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
@@ -84,6 +84,8 @@ void knote_attach(struct knote *kn, struct filedesc *fdp);
void knote_drop(struct knote *kn, struct proc *p, struct filedesc *fdp);
void knote_enqueue(struct knote *kn);
void knote_dequeue(struct knote *kn);
+int knote_acquire(struct knote *kn);
+int knote_release(struct knote *kn);
#define knote_alloc() ((struct knote *)pool_get(&knote_pool, PR_WAITOK))
#define knote_free(kn) pool_put(&knote_pool, (kn))
@@ -759,27 +761,43 @@ start:
goto done;
}
+ marker.kn_filter = EVFILT_MARKER;
+ marker.kn_status = KN_PROCESSING;
TAILQ_INSERT_TAIL(&kq->kq_head, &marker, kn_tqe);
while (count) {
kn = TAILQ_FIRST(&kq->kq_head);
if (kn == &marker) {
- TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
+ TAILQ_REMOVE(&kq->kq_head, &marker, kn_tqe);
splx(s);
if (count == maxevents)
goto retry;
goto done;
}
+ if (kn->kn_filter == EVFILT_MARKER) {
+ struct knote *other_marker = kn;
+
+ /* Move some other threads marker past this kn */
+ kn = TAILQ_NEXT(other_marker, kn_tqe);
+ TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
+ TAILQ_INSERT_BEFORE(other_marker, kn, kn_tqe);
+ continue;
+ }
+
+ if (!knote_acquire(kn))
+ continue;
TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
kq->kq_count--;
if (kn->kn_status & KN_DISABLED) {
kn->kn_status &= ~KN_QUEUED;
+ knote_release(kn);
continue;
}
if ((kn->kn_flags & EV_ONESHOT) == 0 &&
kn->kn_fop->f_event(kn, 0) == 0) {
kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE);
+ knote_release(kn);
continue;
}
*kevp = kn->kn_kevent;
@@ -799,9 +817,11 @@ start:
if (kn->kn_flags & EV_DISPATCH)
kn->kn_status |= KN_DISABLED;
kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE);
+ knote_release(kn);
} else {
TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
kq->kq_count++;
+ knote_release(kn);
}
count--;
if (nkev == KQ_NEVENTS) {
@@ -956,6 +976,41 @@ kqueue_wakeup(struct kqueue *kq)
}
/*
+ * Acquire a knote, return non-zero on success, 0 on failure.
+ *
+ * If we cannot acquire the knote we sleep and return 0. The knote
+ * may be stale on return in this case and the caller must restart
+ * whatever loop they are in.
+ */
+int
+knote_acquire(struct knote *kn)
+{
+ if (kn->kn_status & KN_PROCESSING) {
+ kn->kn_status |= KN_WAITING;
+ tsleep(kn, 0, "kqepts", hz);
+ /* knote may be stale now */
+ return (0);
+ }
+ kn->kn_status |= KN_PROCESSING;
+ return (1);
+}
+
+/*
+ * Release an acquired knote, clearing KN_PROCESSING.
+ */
+int
+knote_release(struct knote *kn)
+{
+ if (kn->kn_status & KN_WAITING) {
+ kn->kn_status &= ~KN_WAITING;
+ wakeup(kn);
+ }
+ kn->kn_status &= ~KN_PROCESSING;
+ /* kn should not be accessed anymore */
+ return (0);
+}
+
+/*
* activate one knote.
*/
void
@@ -986,6 +1041,8 @@ knote_remove(struct proc *p, struct klist *list)
struct knote *kn;
while ((kn = SLIST_FIRST(list)) != NULL) {
+ if (!knote_acquire(kn))
+ continue;
kn->kn_fop->f_detach(kn);
knote_drop(kn, p, p->p_fd);
}
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index ce240b62fe6..92efb46fd6e 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uipc_socket.c,v 1.206 2017/11/02 14:01:18 florian Exp $ */
+/* $OpenBSD: uipc_socket.c,v 1.207 2017/11/04 14:13:53 mpi Exp $ */
/* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */
/*
@@ -1922,8 +1922,10 @@ int
filt_soread(struct knote *kn, long hint)
{
struct socket *so = kn->kn_fp->f_data;
- int rv;
+ int s, rv;
+ if (!(hint & NOTE_SUBMIT))
+ s = solock(so);
kn->kn_data = so->so_rcv.sb_cc;
#ifdef SOCKET_SPLICE
if (isspliced(so)) {
@@ -1941,6 +1943,8 @@ filt_soread(struct knote *kn, long hint)
} else {
rv = (kn->kn_data >= so->so_rcv.sb_lowat);
}
+ if (!(hint & NOTE_SUBMIT))
+ sounlock(s);
return rv;
}
@@ -1961,8 +1965,10 @@ int
filt_sowrite(struct knote *kn, long hint)
{
struct socket *so = kn->kn_fp->f_data;
- int rv;
+ int s, rv;
+ if (!(hint & NOTE_SUBMIT))
+ s = solock(so);
kn->kn_data = sbspace(so, &so->so_snd);
if (so->so_state & SS_CANTSENDMORE) {
kn->kn_flags |= EV_EOF;
@@ -1978,6 +1984,8 @@ filt_sowrite(struct knote *kn, long hint)
} else {
rv = (kn->kn_data >= so->so_snd.sb_lowat);
}
+ if (!(hint & NOTE_SUBMIT))
+ sounlock(s);
return (rv);
}
@@ -1986,8 +1994,13 @@ int
filt_solisten(struct knote *kn, long hint)
{
struct socket *so = kn->kn_fp->f_data;
+ int s;
+ if (!(hint & NOTE_SUBMIT))
+ s = solock(so);
kn->kn_data = so->so_qlen;
+ if (!(hint & NOTE_SUBMIT))
+ sounlock(s);
return (kn->kn_data != 0);
}
diff --git a/sys/miscfs/fifofs/fifo_vnops.c b/sys/miscfs/fifofs/fifo_vnops.c
index 7502183f931..0f2077538a3 100644
--- a/sys/miscfs/fifofs/fifo_vnops.c
+++ b/sys/miscfs/fifofs/fifo_vnops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: fifo_vnops.c,v 1.58 2017/07/24 15:07:39 mpi Exp $ */
+/* $OpenBSD: fifo_vnops.c,v 1.59 2017/11/04 14:13:53 mpi Exp $ */
/* $NetBSD: fifo_vnops.c,v 1.18 1996/03/16 23:52:42 christos Exp $ */
/*
@@ -545,8 +545,10 @@ int
filt_fiforead(struct knote *kn, long hint)
{
struct socket *so = (struct socket *)kn->kn_hook;
- int rv;
+ int s, rv;
+ if (!(hint & NOTE_SUBMIT))
+ s = solock(so);
kn->kn_data = so->so_rcv.sb_cc;
if (so->so_state & SS_CANTRCVMORE) {
kn->kn_flags |= EV_EOF;
@@ -555,6 +557,8 @@ filt_fiforead(struct knote *kn, long hint)
kn->kn_flags &= ~EV_EOF;
rv = (kn->kn_data > 0);
}
+ if (!(hint & NOTE_SUBMIT))
+ sounlock(s);
return (rv);
}
@@ -573,8 +577,10 @@ int
filt_fifowrite(struct knote *kn, long hint)
{
struct socket *so = (struct socket *)kn->kn_hook;
- int rv;
+ int s, rv;
+ if (!(hint & NOTE_SUBMIT))
+ s = solock(so);
kn->kn_data = sbspace(so, &so->so_snd);
if (so->so_state & SS_CANTSENDMORE) {
kn->kn_flags |= EV_EOF;
@@ -583,6 +589,8 @@ filt_fifowrite(struct knote *kn, long hint)
kn->kn_flags &= ~EV_EOF;
rv = (kn->kn_data >= so->so_snd.sb_lowat);
}
+ if (!(hint & NOTE_SUBMIT))
+ sounlock(s);
return (rv);
}
diff --git a/sys/sys/event.h b/sys/sys/event.h
index 6c3de0b5a6b..0bcf648b60c 100644
--- a/sys/sys/event.h
+++ b/sys/sys/event.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: event.h,v 1.26 2017/06/26 09:32:32 mpi Exp $ */
+/* $OpenBSD: event.h,v 1.27 2017/11/04 14:13:53 mpi Exp $ */
/*-
* Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
@@ -80,13 +80,6 @@ struct kevent {
#define EV_ERROR 0x4000 /* error, data contains errno */
/*
- * hint flag for in-kernel use - must not equal any existing note
- */
-#ifdef _KERNEL
-#define NOTE_SUBMIT 0x01000000 /* initial knote submission */
-#endif
-
-/*
* data/hint flags for EVFILT_{READ|WRITE}, shared with userspace
*/
#define NOTE_LOWAT 0x0001 /* low water mark */
@@ -128,6 +121,13 @@ SLIST_HEAD(klist, knote);
#ifdef _KERNEL
+#define EVFILT_MARKER 0xF /* placemarker for tailq */
+
+/*
+ * hint flag for in-kernel use - must not equal any existing note
+ */
+#define NOTE_SUBMIT 0x01000000 /* initial knote submission */
+
#define KNOTE(list_, hint) do { \
struct klist *list = (list_); \
if ((list) != NULL) \
@@ -164,10 +164,12 @@ struct knote {
} kn_ptr;
const struct filterops *kn_fop;
void *kn_hook;
-#define KN_ACTIVE 0x01 /* event has been triggered */
-#define KN_QUEUED 0x02 /* event is on queue */
-#define KN_DISABLED 0x04 /* event is disabled */
-#define KN_DETACHED 0x08 /* knote is detached */
+#define KN_ACTIVE 0x0001 /* event has been triggered */
+#define KN_QUEUED 0x0002 /* event is on queue */
+#define KN_DISABLED 0x0004 /* event is disabled */
+#define KN_DETACHED 0x0008 /* knote is detached */
+#define KN_PROCESSING 0x0010 /* event processing in prog */
+#define KN_WAITING 0x0020 /* waiting on processing */
#define kn_id kn_kevent.ident
#define kn_filter kn_kevent.filter
diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h
index 8104d82c629..f4b54bce2c1 100644
--- a/sys/sys/socketvar.h
+++ b/sys/sys/socketvar.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: socketvar.h,v 1.76 2017/09/01 15:05:31 mpi Exp $ */
+/* $OpenBSD: socketvar.h,v 1.77 2017/11/04 14:13:53 mpi Exp $ */
/* $NetBSD: socketvar.h,v 1.18 1996/02/09 18:25:38 christos Exp $ */
/*-
@@ -186,10 +186,7 @@ static inline long
sbspace(struct socket *so, struct sockbuf *sb)
{
KASSERT(sb == &so->so_rcv || sb == &so->so_snd);
-#if 0
- /* XXXSMP kqueue_scan() calling filt_sowrite() cannot sleep. */
soassertlocked(so);
-#endif
return lmin(sb->sb_hiwat - sb->sb_cc, sb->sb_mbmax - sb->sb_mbcnt);
}