diff options
author | 2019-03-25 23:32:00 +0000 | |
---|---|---|
committer | 2019-03-25 23:32:00 +0000 | |
commit | af3eeb45896d4447c633a9599e8da254391a9862 (patch) | |
tree | d06c756d1c0867037df216f672df03c22f09a7ec | |
parent | Fix authentication failures when "AuthenticationMethods any" in a (diff) | |
download | wireguard-openbsd-af3eeb45896d4447c633a9599e8da254391a9862.tar.xz wireguard-openbsd-af3eeb45896d4447c633a9599e8da254391a9862.zip |
MP-safe timecounting: new rwlock: tc_lock
tc_lock allows adjfreq(2) and the kern.timecounter.hardware sysctl(2)
to read/write the active timecounter pointer and the .tc_adj_freq
member of the active timecounter safely. This eliminates any possibility
of a torn read/write for the .tc_adj_freq member when we drop the
KERNEL_LOCK from the timecounting layer. It also ensures the active
timecounter does not change in the midst of an adjfreq(2) call.
Because these are not high-traffic paths, we can get away with using
tc_lock in write-mode to ensure combination read/write adjtime(2) calls
are relatively atomic (a) to other writer adjtime(2) calls, and (b) to
settimeofday(2)/clock_settime(2) calls, which cancel ongoing adjtime(2)
adjustment.
When the KERNEL_LOCK is dropped, an unprivileged user will be able to
create some tc_lock contention via adjfreq(2); it is very unlikely to
ever be a problem. If it ever is actually a problem a lockless read
could be added to address it.
While here, reorganize sys_adjfreq()/sys_adjtime() to minimize code
under the lock. Also while here, make tc_adjfreq() void, as it cannot
fail under any circumstance. Also also while here, annotate various
globals/struct members with lock ordering details.
With lots of input from mpi@ and visa@.
ok visa@
-rw-r--r-- | sys/kern/kern_tc.c | 56 | ||||
-rw-r--r-- | sys/kern/kern_time.c | 61 | ||||
-rw-r--r-- | sys/sys/timetc.h | 32 |
3 files changed, 94 insertions, 55 deletions
diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c index 0e6abe174fe..2e426238564 100644 --- a/sys/kern/kern_tc.c +++ b/sys/kern/kern_tc.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_tc.c,v 1.42 2019/03/22 18:27:12 cheloha Exp $ */ +/* $OpenBSD: kern_tc.c,v 1.43 2019/03/25 23:32:00 cheloha Exp $ */ /* * Copyright (c) 2000 Poul-Henning Kamp <phk@FreeBSD.org> @@ -25,6 +25,7 @@ #include <sys/atomic.h> #include <sys/kernel.h> #include <sys/mutex.h> +#include <sys/rwlock.h> #include <sys/timeout.h> #include <sys/sysctl.h> #include <sys/syslog.h> @@ -64,20 +65,27 @@ static struct timecounter dummy_timecounter = { dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000 }; +/* + * Locks used to protect struct members, global variables in this file: + * I immutable after initialization + * t tc_lock + * w windup_mtx + */ + struct timehands { /* These fields must be initialized by the driver. */ - struct timecounter *th_counter; - int64_t th_adjtimedelta; - int64_t th_adjustment; - u_int64_t th_scale; - u_int th_offset_count; - struct bintime th_boottime; - struct bintime th_offset; - struct timeval th_microtime; - struct timespec th_nanotime; + struct timecounter *th_counter; /* [w] */ + int64_t th_adjtimedelta; /* [tw] */ + int64_t th_adjustment; /* [w] */ + u_int64_t th_scale; /* [w] */ + u_int th_offset_count; /* [w] */ + struct bintime th_boottime; /* [tw] */ + struct bintime th_offset; /* [w] */ + struct timeval th_microtime; /* [w] */ + struct timespec th_nanotime; /* [w] */ /* Fields not to be copied in tc_windup start with th_generation. */ - volatile u_int th_generation; - struct timehands *th_next; + volatile u_int th_generation; /* [w] */ + struct timehands *th_next; /* [I] */ }; static struct timehands th0; @@ -104,15 +112,15 @@ static struct timehands th0 = { &th1 }; +struct rwlock tc_lock = RWLOCK_INITIALIZER("tc_lock"); + /* - * Serializes writes to the members of the next active timehands. - * * tc_windup() must be called before leaving this mutex. */ struct mutex windup_mtx = MUTEX_INITIALIZER(IPL_CLOCK); -static struct timehands *volatile timehands = &th0; -struct timecounter *timecounter = &dummy_timecounter; +static struct timehands *volatile timehands = &th0; /* [w] */ +struct timecounter *timecounter = &dummy_timecounter; /* [t] */ static struct timecounter *timecounters = &dummy_timecounter; volatile time_t time_second = 1; @@ -360,6 +368,7 @@ tc_setrealtimeclock(const struct timespec *ts) struct bintime bt, bt2; int64_t zero = 0; + rw_enter_write(&tc_lock); mtx_enter(&windup_mtx); binuptime(&bt2); timespec2bintime(ts, &bt); @@ -369,6 +378,7 @@ tc_setrealtimeclock(const struct timespec *ts) /* XXX fiddle all the little crinkly bits around the fiords... */ tc_windup(&bt, NULL, &zero); mtx_leave(&windup_mtx); + rw_exit_write(&tc_lock); enqueue_randomness(ts->tv_sec); @@ -459,6 +469,8 @@ tc_windup(struct bintime *new_boottime, struct bintime *new_offset, u_int delta, ncount, ogen; int i; + if (new_boottime != NULL || new_adjtimedelta != NULL) + rw_assert_wrlock(&tc_lock); MUTEX_ASSERT_LOCKED(&windup_mtx); active_tc = timecounter; @@ -612,7 +624,10 @@ sysctl_tc_hardware(void *oldp, size_t *oldlenp, void *newp, size_t newlen) (void)newtc->tc_get_timecount(newtc); (void)newtc->tc_get_timecount(newtc); + rw_enter_write(&tc_lock); timecounter = newtc; + rw_exit_write(&tc_lock); + return (0); } return (EINVAL); @@ -739,16 +754,20 @@ ntp_update_second(struct timehands *th) th->th_adjustment += th->th_counter->tc_freq_adj; } -int +void tc_adjfreq(int64_t *old, int64_t *new) { if (old != NULL) { + rw_assert_anylock(&tc_lock); *old = timecounter->tc_freq_adj; } if (new != NULL) { + rw_assert_wrlock(&tc_lock); + mtx_enter(&windup_mtx); timecounter->tc_freq_adj = *new; + tc_windup(NULL, NULL, NULL); + mtx_leave(&windup_mtx); } - return 0; } void @@ -767,6 +786,7 @@ tc_adjtime(int64_t *old, int64_t *new) } while (gen == 0 || gen != th->th_generation); } if (new != NULL) { + rw_assert_wrlock(&tc_lock); mtx_enter(&windup_mtx); tc_windup(NULL, NULL, new); mtx_leave(&windup_mtx); diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c index 5dc32a29b14..b14bbb05246 100644 --- a/sys/kern/kern_time.c +++ b/sys/kern/kern_time.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_time.c,v 1.111 2019/03/10 21:16:15 cheloha Exp $ */ +/* $OpenBSD: kern_time.c,v 1.112 2019/03/25 23:32:00 cheloha Exp $ */ /* $NetBSD: kern_time.c,v 1.20 1996/02/18 11:57:06 fvdl Exp $ */ /* @@ -36,6 +36,7 @@ #include <sys/resourcevar.h> #include <sys/kernel.h> #include <sys/systm.h> +#include <sys/rwlock.h> #include <sys/proc.h> #include <sys/ktrace.h> #include <sys/vnode.h> @@ -389,21 +390,25 @@ sys_adjfreq(struct proc *p, void *v, register_t *retval) int64_t f; const int64_t *freq = SCARG(uap, freq); int64_t *oldfreq = SCARG(uap, oldfreq); - if (oldfreq) { - if ((error = tc_adjfreq(&f, NULL))) - return (error); - if ((error = copyout(&f, oldfreq, sizeof(f)))) - return (error); - } + if (freq) { if ((error = suser(p))) return (error); if ((error = copyin(freq, &f, sizeof(f)))) return (error); - if ((error = tc_adjfreq(NULL, &f))) - return (error); } - return (0); + + rw_enter(&tc_lock, (freq == NULL) ? RW_READ : RW_WRITE); + if (oldfreq) { + tc_adjfreq(&f, NULL); + if ((error = copyout(&f, oldfreq, sizeof(f)))) + goto out; + } + if (freq) + tc_adjfreq(NULL, &f); +out: + rw_exit(&tc_lock); + return (error); } int @@ -423,6 +428,20 @@ sys_adjtime(struct proc *p, void *v, register_t *retval) if (error) return error; + if (delta) { + if ((error = suser(p))) + return (error); + if ((error = copyin(delta, &atv, sizeof(struct timeval)))) + return (error); + if (!timerisvalid(&atv)) + return (EINVAL); + + /* XXX Check for overflow? */ + adjustment = (int64_t)atv.tv_sec * 1000000 + atv.tv_usec; + + rw_enter_write(&tc_lock); + } + if (olddelta) { tc_adjtime(&remaining, NULL); memset(&atv, 0, sizeof(atv)); @@ -434,25 +453,15 @@ sys_adjtime(struct proc *p, void *v, register_t *retval) } if ((error = copyout(&atv, olddelta, sizeof(struct timeval)))) - return (error); + goto out; } - if (delta) { - if ((error = suser(p))) - return (error); - - if ((error = copyin(delta, &atv, sizeof(struct timeval)))) - return (error); - - if (!timerisvalid(&atv)) - return (EINVAL); - - /* XXX Check for overflow? */ - adjustment = (int64_t)atv.tv_sec * 1000000 + atv.tv_usec; + if (delta) tc_adjtime(NULL, &adjustment); - } - - return (0); +out: + if (delta) + rw_exit_write(&tc_lock); + return (error); } diff --git a/sys/sys/timetc.h b/sys/sys/timetc.h index 0be79eea736..7340a60b89a 100644 --- a/sys/sys/timetc.h +++ b/sys/sys/timetc.h @@ -1,4 +1,4 @@ -/* $OpenBSD: timetc.h,v 1.7 2019/03/10 21:16:15 cheloha Exp $ */ +/* $OpenBSD: timetc.h,v 1.8 2019/03/25 23:32:00 cheloha Exp $ */ /* * Copyright (c) 2000 Poul-Henning Kamp <phk@FreeBSD.org> @@ -43,40 +43,50 @@ struct timecounter; typedef u_int timecounter_get_t(struct timecounter *); typedef void timecounter_pps_t(struct timecounter *); +/* + * Locks used to protect struct members in this file: + * I immutable after initialization + * t tc_lock + * w windup_mtx + */ + struct timecounter { - timecounter_get_t *tc_get_timecount; + timecounter_get_t *tc_get_timecount; /* [I] */ /* * This function reads the counter. It is not required to * mask any unimplemented bits out, as long as they are * constant. */ - timecounter_pps_t *tc_poll_pps; + timecounter_pps_t *tc_poll_pps; /* [I] */ /* * This function is optional. It will be called whenever the * timecounter is rewound, and is intended to check for PPS * events. Normal hardware does not need it but timecounters * which latch PPS in hardware (like sys/pci/xrpu.c) do. */ - u_int tc_counter_mask; + u_int tc_counter_mask; /* [I] */ /* This mask should mask off any unimplemented bits. */ - u_int64_t tc_frequency; + u_int64_t tc_frequency; /* [I] */ /* Frequency of the counter in Hz. */ - char *tc_name; + char *tc_name; /* [I] */ /* Name of the timecounter. */ - int tc_quality; + int tc_quality; /* [I] */ /* * Used to determine if this timecounter is better than * another timecounter higher means better. Negative * means "only use at explicit request". */ - void *tc_priv; + void *tc_priv; /* [I] */ /* Pointer to the timecounter's private parts. */ - struct timecounter *tc_next; + struct timecounter *tc_next; /* [I] */ /* Pointer to the next timecounter. */ - int64_t tc_freq_adj; + int64_t tc_freq_adj; /* [tw] */ /* Current frequency adjustment. */ }; +struct rwlock; +extern struct rwlock tc_lock; + extern struct timecounter *timecounter; u_int64_t tc_getfrequency(void); @@ -86,7 +96,7 @@ void tc_setrealtimeclock(const struct timespec *ts); void tc_ticktock(void); void inittimecounter(void); int sysctl_tc(int *, u_int, void *, size_t *, void *, size_t); -int tc_adjfreq(int64_t *, int64_t *); +void tc_adjfreq(int64_t *, int64_t *); void tc_adjtime(int64_t *, int64_t *); #endif /* !_SYS_TIMETC_H_ */ |