summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorcheloha <cheloha@openbsd.org>2019-03-25 23:32:00 +0000
committercheloha <cheloha@openbsd.org>2019-03-25 23:32:00 +0000
commitaf3eeb45896d4447c633a9599e8da254391a9862 (patch)
treed06c756d1c0867037df216f672df03c22f09a7ec
parentFix authentication failures when "AuthenticationMethods any" in a (diff)
downloadwireguard-openbsd-af3eeb45896d4447c633a9599e8da254391a9862.tar.xz
wireguard-openbsd-af3eeb45896d4447c633a9599e8da254391a9862.zip
MP-safe timecounting: new rwlock: tc_lock
tc_lock allows adjfreq(2) and the kern.timecounter.hardware sysctl(2) to read/write the active timecounter pointer and the .tc_adj_freq member of the active timecounter safely. This eliminates any possibility of a torn read/write for the .tc_adj_freq member when we drop the KERNEL_LOCK from the timecounting layer. It also ensures the active timecounter does not change in the midst of an adjfreq(2) call. Because these are not high-traffic paths, we can get away with using tc_lock in write-mode to ensure combination read/write adjtime(2) calls are relatively atomic (a) to other writer adjtime(2) calls, and (b) to settimeofday(2)/clock_settime(2) calls, which cancel ongoing adjtime(2) adjustment. When the KERNEL_LOCK is dropped, an unprivileged user will be able to create some tc_lock contention via adjfreq(2); it is very unlikely to ever be a problem. If it ever is actually a problem a lockless read could be added to address it. While here, reorganize sys_adjfreq()/sys_adjtime() to minimize code under the lock. Also while here, make tc_adjfreq() void, as it cannot fail under any circumstance. Also also while here, annotate various globals/struct members with lock ordering details. With lots of input from mpi@ and visa@. ok visa@
-rw-r--r--sys/kern/kern_tc.c56
-rw-r--r--sys/kern/kern_time.c61
-rw-r--r--sys/sys/timetc.h32
3 files changed, 94 insertions, 55 deletions
diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c
index 0e6abe174fe..2e426238564 100644
--- a/sys/kern/kern_tc.c
+++ b/sys/kern/kern_tc.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_tc.c,v 1.42 2019/03/22 18:27:12 cheloha Exp $ */
+/* $OpenBSD: kern_tc.c,v 1.43 2019/03/25 23:32:00 cheloha Exp $ */
/*
* Copyright (c) 2000 Poul-Henning Kamp <phk@FreeBSD.org>
@@ -25,6 +25,7 @@
#include <sys/atomic.h>
#include <sys/kernel.h>
#include <sys/mutex.h>
+#include <sys/rwlock.h>
#include <sys/timeout.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
@@ -64,20 +65,27 @@ static struct timecounter dummy_timecounter = {
dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000
};
+/*
+ * Locks used to protect struct members, global variables in this file:
+ * I immutable after initialization
+ * t tc_lock
+ * w windup_mtx
+ */
+
struct timehands {
/* These fields must be initialized by the driver. */
- struct timecounter *th_counter;
- int64_t th_adjtimedelta;
- int64_t th_adjustment;
- u_int64_t th_scale;
- u_int th_offset_count;
- struct bintime th_boottime;
- struct bintime th_offset;
- struct timeval th_microtime;
- struct timespec th_nanotime;
+ struct timecounter *th_counter; /* [w] */
+ int64_t th_adjtimedelta; /* [tw] */
+ int64_t th_adjustment; /* [w] */
+ u_int64_t th_scale; /* [w] */
+ u_int th_offset_count; /* [w] */
+ struct bintime th_boottime; /* [tw] */
+ struct bintime th_offset; /* [w] */
+ struct timeval th_microtime; /* [w] */
+ struct timespec th_nanotime; /* [w] */
/* Fields not to be copied in tc_windup start with th_generation. */
- volatile u_int th_generation;
- struct timehands *th_next;
+ volatile u_int th_generation; /* [w] */
+ struct timehands *th_next; /* [I] */
};
static struct timehands th0;
@@ -104,15 +112,15 @@ static struct timehands th0 = {
&th1
};
+struct rwlock tc_lock = RWLOCK_INITIALIZER("tc_lock");
+
/*
- * Serializes writes to the members of the next active timehands.
- *
* tc_windup() must be called before leaving this mutex.
*/
struct mutex windup_mtx = MUTEX_INITIALIZER(IPL_CLOCK);
-static struct timehands *volatile timehands = &th0;
-struct timecounter *timecounter = &dummy_timecounter;
+static struct timehands *volatile timehands = &th0; /* [w] */
+struct timecounter *timecounter = &dummy_timecounter; /* [t] */
static struct timecounter *timecounters = &dummy_timecounter;
volatile time_t time_second = 1;
@@ -360,6 +368,7 @@ tc_setrealtimeclock(const struct timespec *ts)
struct bintime bt, bt2;
int64_t zero = 0;
+ rw_enter_write(&tc_lock);
mtx_enter(&windup_mtx);
binuptime(&bt2);
timespec2bintime(ts, &bt);
@@ -369,6 +378,7 @@ tc_setrealtimeclock(const struct timespec *ts)
/* XXX fiddle all the little crinkly bits around the fiords... */
tc_windup(&bt, NULL, &zero);
mtx_leave(&windup_mtx);
+ rw_exit_write(&tc_lock);
enqueue_randomness(ts->tv_sec);
@@ -459,6 +469,8 @@ tc_windup(struct bintime *new_boottime, struct bintime *new_offset,
u_int delta, ncount, ogen;
int i;
+ if (new_boottime != NULL || new_adjtimedelta != NULL)
+ rw_assert_wrlock(&tc_lock);
MUTEX_ASSERT_LOCKED(&windup_mtx);
active_tc = timecounter;
@@ -612,7 +624,10 @@ sysctl_tc_hardware(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
(void)newtc->tc_get_timecount(newtc);
(void)newtc->tc_get_timecount(newtc);
+ rw_enter_write(&tc_lock);
timecounter = newtc;
+ rw_exit_write(&tc_lock);
+
return (0);
}
return (EINVAL);
@@ -739,16 +754,20 @@ ntp_update_second(struct timehands *th)
th->th_adjustment += th->th_counter->tc_freq_adj;
}
-int
+void
tc_adjfreq(int64_t *old, int64_t *new)
{
if (old != NULL) {
+ rw_assert_anylock(&tc_lock);
*old = timecounter->tc_freq_adj;
}
if (new != NULL) {
+ rw_assert_wrlock(&tc_lock);
+ mtx_enter(&windup_mtx);
timecounter->tc_freq_adj = *new;
+ tc_windup(NULL, NULL, NULL);
+ mtx_leave(&windup_mtx);
}
- return 0;
}
void
@@ -767,6 +786,7 @@ tc_adjtime(int64_t *old, int64_t *new)
} while (gen == 0 || gen != th->th_generation);
}
if (new != NULL) {
+ rw_assert_wrlock(&tc_lock);
mtx_enter(&windup_mtx);
tc_windup(NULL, NULL, new);
mtx_leave(&windup_mtx);
diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c
index 5dc32a29b14..b14bbb05246 100644
--- a/sys/kern/kern_time.c
+++ b/sys/kern/kern_time.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_time.c,v 1.111 2019/03/10 21:16:15 cheloha Exp $ */
+/* $OpenBSD: kern_time.c,v 1.112 2019/03/25 23:32:00 cheloha Exp $ */
/* $NetBSD: kern_time.c,v 1.20 1996/02/18 11:57:06 fvdl Exp $ */
/*
@@ -36,6 +36,7 @@
#include <sys/resourcevar.h>
#include <sys/kernel.h>
#include <sys/systm.h>
+#include <sys/rwlock.h>
#include <sys/proc.h>
#include <sys/ktrace.h>
#include <sys/vnode.h>
@@ -389,21 +390,25 @@ sys_adjfreq(struct proc *p, void *v, register_t *retval)
int64_t f;
const int64_t *freq = SCARG(uap, freq);
int64_t *oldfreq = SCARG(uap, oldfreq);
- if (oldfreq) {
- if ((error = tc_adjfreq(&f, NULL)))
- return (error);
- if ((error = copyout(&f, oldfreq, sizeof(f))))
- return (error);
- }
+
if (freq) {
if ((error = suser(p)))
return (error);
if ((error = copyin(freq, &f, sizeof(f))))
return (error);
- if ((error = tc_adjfreq(NULL, &f)))
- return (error);
}
- return (0);
+
+ rw_enter(&tc_lock, (freq == NULL) ? RW_READ : RW_WRITE);
+ if (oldfreq) {
+ tc_adjfreq(&f, NULL);
+ if ((error = copyout(&f, oldfreq, sizeof(f))))
+ goto out;
+ }
+ if (freq)
+ tc_adjfreq(NULL, &f);
+out:
+ rw_exit(&tc_lock);
+ return (error);
}
int
@@ -423,6 +428,20 @@ sys_adjtime(struct proc *p, void *v, register_t *retval)
if (error)
return error;
+ if (delta) {
+ if ((error = suser(p)))
+ return (error);
+ if ((error = copyin(delta, &atv, sizeof(struct timeval))))
+ return (error);
+ if (!timerisvalid(&atv))
+ return (EINVAL);
+
+ /* XXX Check for overflow? */
+ adjustment = (int64_t)atv.tv_sec * 1000000 + atv.tv_usec;
+
+ rw_enter_write(&tc_lock);
+ }
+
if (olddelta) {
tc_adjtime(&remaining, NULL);
memset(&atv, 0, sizeof(atv));
@@ -434,25 +453,15 @@ sys_adjtime(struct proc *p, void *v, register_t *retval)
}
if ((error = copyout(&atv, olddelta, sizeof(struct timeval))))
- return (error);
+ goto out;
}
- if (delta) {
- if ((error = suser(p)))
- return (error);
-
- if ((error = copyin(delta, &atv, sizeof(struct timeval))))
- return (error);
-
- if (!timerisvalid(&atv))
- return (EINVAL);
-
- /* XXX Check for overflow? */
- adjustment = (int64_t)atv.tv_sec * 1000000 + atv.tv_usec;
+ if (delta)
tc_adjtime(NULL, &adjustment);
- }
-
- return (0);
+out:
+ if (delta)
+ rw_exit_write(&tc_lock);
+ return (error);
}
diff --git a/sys/sys/timetc.h b/sys/sys/timetc.h
index 0be79eea736..7340a60b89a 100644
--- a/sys/sys/timetc.h
+++ b/sys/sys/timetc.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: timetc.h,v 1.7 2019/03/10 21:16:15 cheloha Exp $ */
+/* $OpenBSD: timetc.h,v 1.8 2019/03/25 23:32:00 cheloha Exp $ */
/*
* Copyright (c) 2000 Poul-Henning Kamp <phk@FreeBSD.org>
@@ -43,40 +43,50 @@ struct timecounter;
typedef u_int timecounter_get_t(struct timecounter *);
typedef void timecounter_pps_t(struct timecounter *);
+/*
+ * Locks used to protect struct members in this file:
+ * I immutable after initialization
+ * t tc_lock
+ * w windup_mtx
+ */
+
struct timecounter {
- timecounter_get_t *tc_get_timecount;
+ timecounter_get_t *tc_get_timecount; /* [I] */
/*
* This function reads the counter. It is not required to
* mask any unimplemented bits out, as long as they are
* constant.
*/
- timecounter_pps_t *tc_poll_pps;
+ timecounter_pps_t *tc_poll_pps; /* [I] */
/*
* This function is optional. It will be called whenever the
* timecounter is rewound, and is intended to check for PPS
* events. Normal hardware does not need it but timecounters
* which latch PPS in hardware (like sys/pci/xrpu.c) do.
*/
- u_int tc_counter_mask;
+ u_int tc_counter_mask; /* [I] */
/* This mask should mask off any unimplemented bits. */
- u_int64_t tc_frequency;
+ u_int64_t tc_frequency; /* [I] */
/* Frequency of the counter in Hz. */
- char *tc_name;
+ char *tc_name; /* [I] */
/* Name of the timecounter. */
- int tc_quality;
+ int tc_quality; /* [I] */
/*
* Used to determine if this timecounter is better than
* another timecounter higher means better. Negative
* means "only use at explicit request".
*/
- void *tc_priv;
+ void *tc_priv; /* [I] */
/* Pointer to the timecounter's private parts. */
- struct timecounter *tc_next;
+ struct timecounter *tc_next; /* [I] */
/* Pointer to the next timecounter. */
- int64_t tc_freq_adj;
+ int64_t tc_freq_adj; /* [tw] */
/* Current frequency adjustment. */
};
+struct rwlock;
+extern struct rwlock tc_lock;
+
extern struct timecounter *timecounter;
u_int64_t tc_getfrequency(void);
@@ -86,7 +96,7 @@ void tc_setrealtimeclock(const struct timespec *ts);
void tc_ticktock(void);
void inittimecounter(void);
int sysctl_tc(int *, u_int, void *, size_t *, void *, size_t);
-int tc_adjfreq(int64_t *, int64_t *);
+void tc_adjfreq(int64_t *, int64_t *);
void tc_adjtime(int64_t *, int64_t *);
#endif /* !_SYS_TIMETC_H_ */