summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorclaudio <claudio@openbsd.org>2008-11-24 12:53:53 +0000
committerclaudio <claudio@openbsd.org>2008-11-24 12:53:53 +0000
commitc79b0376db813d069b2a4c1a948b6af178f1a47d (patch)
treeed0ee6d52ba00cff9249f9080906a8782079fd3b
parentdrop the requirement that the rx ring has to be filled with mbufs. we only (diff)
downloadwireguard-openbsd-c79b0376db813d069b2a4c1a948b6af178f1a47d.tar.xz
wireguard-openbsd-c79b0376db813d069b2a4c1a948b6af178f1a47d.zip
Implement link-state tracking on the routing table. Routes to interfaces
which are considered down will no be marked ~RTF_UP and so multipath routing will start to work as expected and not pump 50% of the traffic to nirvana. Most of the magic happens in rn_mpath_reprio() which fiddles with the routing table internals. The rest is more straight forward. get it in deraadt@
-rw-r--r--sys/net/if.c11
-rw-r--r--sys/net/radix_mpath.c103
-rw-r--r--sys/net/radix_mpath.h3
-rw-r--r--sys/net/route.c71
-rw-r--r--sys/net/route.h6
5 files changed, 187 insertions, 7 deletions
diff --git a/sys/net/if.c b/sys/net/if.c
index 916df0176eb..8bfb97df149 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: if.c,v 1.175 2008/11/21 18:01:30 claudio Exp $ */
+/* $OpenBSD: if.c,v 1.176 2008/11/24 12:53:53 claudio Exp $ */
/* $NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $ */
/*
@@ -1066,6 +1066,9 @@ if_down(struct ifnet *ifp)
bstp_ifstate(ifp);
#endif
rt_ifmsg(ifp);
+#ifndef SMALL_KERNEL
+ rt_if_track(ifp);
+#endif
}
/*
@@ -1102,6 +1105,9 @@ if_up(struct ifnet *ifp)
#ifdef INET6
in6_if_up(ifp);
#endif
+#ifndef SMALL_KERNEL
+ rt_if_track(ifp);
+#endif
}
/*
@@ -1112,6 +1118,9 @@ void
if_link_state_change(struct ifnet *ifp)
{
rt_ifmsg(ifp);
+#ifndef SMALL_KERNEL
+ rt_if_track(ifp);
+#endif
dohooks(ifp->if_linkstatehooks, 0);
}
diff --git a/sys/net/radix_mpath.c b/sys/net/radix_mpath.c
index c2718c206c0..933a04af505 100644
--- a/sys/net/radix_mpath.c
+++ b/sys/net/radix_mpath.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: radix_mpath.c,v 1.12 2008/11/21 18:01:30 claudio Exp $ */
+/* $OpenBSD: radix_mpath.c,v 1.13 2008/11/24 12:53:53 claudio Exp $ */
/* $KAME: radix_mpath.c,v 1.13 2002/10/28 21:05:59 itojun Exp $ */
/*
@@ -57,6 +57,9 @@ u_int32_t rn_mpath_hash(struct route *, u_int32_t *);
* give some jitter to hash, to avoid synchronization between routers
*/
static u_int32_t hashjitter;
+#ifdef RN_DEBUG
+extern struct radix_node *rn_clist;
+#endif
int
rn_mpath_capable(struct radix_node_head *rnh)
@@ -106,6 +109,104 @@ rn_mpath_prio(struct radix_node *rn, u_int8_t prio)
return (prev);
}
+void
+rn_mpath_reprio(struct radix_node *rn, int newprio)
+{
+ struct radix_node *prev = rn->rn_p;
+ struct radix_node *next = rn->rn_dupedkey;
+ struct radix_node *t, *tt, *saved_tt;
+ struct rtentry *rt = (struct rtentry *)rn;
+ int mid, oldprio, prioinv = 0;
+
+ oldprio = rt->rt_priority;
+ rt->rt_priority = newprio;
+
+ /* same prio, no change needed */
+ if (oldprio == newprio)
+ return;
+ if (rn_mpath_next(rn, 1) == NULL) {
+ /* no need to move node route is alone */
+ if (prev->rn_mask != rn->rn_mask)
+ return;
+ /* ... or route is last and prio gets bigger */
+ if (oldprio < newprio)
+ return;
+ }
+
+ /* remove node from dupedkey list and reinsert at correct place */
+ if (prev->rn_dupedkey == rn) {
+ prev->rn_dupedkey = next;
+ if (next)
+ next->rn_p = prev;
+ else
+ next = prev;
+ } else {
+ if (next == NULL)
+ panic("next == NULL");
+ next->rn_p = prev;
+ if (prev->rn_l == rn)
+ prev->rn_l = next;
+ else
+ prev->rn_r = next;
+ }
+
+ /* re-insert rn at the right spot */
+ for (tt = next; tt->rn_p->rn_mask == rn->rn_mask;
+ tt = tt->rn_p)
+ ;
+ saved_tt = tt;
+
+ /*
+ * Stolen from radix.c rn_addroute().
+ * This is nasty code with a certain amount of magic and dragons.
+ * t is the element where the re-priorized rn is inserted -- before
+ * or after depending on prioinv. tt and saved_tt are just helpers.
+ */
+ tt = rn_mpath_prio(tt, newprio);
+ if (((struct rtentry *)tt)->rt_priority != newprio) {
+ if (((struct rtentry *)tt)->rt_priority > newprio)
+ prioinv = 1;
+ t = tt;
+ } else {
+ mid = rn_mpath_count(tt) / 2;
+ do {
+ t = tt;
+ tt = rn_mpath_next(tt, 0);
+ } while (tt && --mid > 0);
+ }
+
+ if (tt == saved_tt && prioinv) {
+ /* link in at head of list */
+ rn->rn_dupedkey = tt;
+ rn->rn_p = tt->rn_p;
+ tt->rn_p = rn;
+ if (rn->rn_p->rn_l == tt)
+ rn->rn_p->rn_l = rn;
+ else
+ rn->rn_p->rn_r = rn;
+ } else if (prioinv == 1) {
+ rn->rn_dupedkey = t;
+ t->rn_p->rn_dupedkey = rn;
+ rn->rn_p = t->rn_p;
+ t->rn_p = rn;
+ } else {
+ rn->rn_dupedkey = t->rn_dupedkey;
+ t->rn_dupedkey = rn;
+ rn->rn_p = t;
+ if (rn->rn_dupedkey)
+ rn->rn_dupedkey->rn_p = rn;
+ }
+#ifdef RN_DEBUG
+ /* readd at head of creation list */
+ for (t = rn_clist; t && t->rn_ybro != rn; t->rn_ybro)
+ ;
+ if (t)
+ t->rn_ybro = rn->rn_ybro;
+ rn->rn_ybro = rn_clist;
+ rn_clist = rn;
+#endif
+}
+
int
rn_mpath_count(struct radix_node *rn)
{
diff --git a/sys/net/radix_mpath.h b/sys/net/radix_mpath.h
index 2829397870a..ad0871e7a26 100644
--- a/sys/net/radix_mpath.h
+++ b/sys/net/radix_mpath.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: radix_mpath.h,v 1.7 2008/11/21 18:01:30 claudio Exp $ */
+/* $OpenBSD: radix_mpath.h,v 1.8 2008/11/24 12:53:53 claudio Exp $ */
/* $KAME: radix_mpath.h,v 1.9 2004/03/30 11:21:49 keiichi Exp $ */
/*
@@ -47,6 +47,7 @@ struct sockaddr;
int rn_mpath_capable(struct radix_node_head *);
struct radix_node *rn_mpath_next(struct radix_node *, int);
struct radix_node *rn_mpath_prio(struct radix_node *, u_int8_t);
+void rn_mpath_reprio(struct radix_node *, int);
int rn_mpath_count(struct radix_node *);
struct rtentry *rt_mpath_matchgate(struct rtentry *, struct sockaddr *,
u_int8_t);
diff --git a/sys/net/route.c b/sys/net/route.c
index f35317bbf7d..a9feb004fb0 100644
--- a/sys/net/route.c
+++ b/sys/net/route.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: route.c,v 1.98 2008/11/21 18:01:30 claudio Exp $ */
+/* $OpenBSD: route.c,v 1.99 2008/11/24 12:53:53 claudio Exp $ */
/* $NetBSD: route.c,v 1.14 1996/02/13 22:00:46 christos Exp $ */
/*
@@ -144,10 +144,12 @@ struct pool rttimer_pool; /* pool for rttimer structures */
int rtable_init(struct radix_node_head ***);
int okaytoclone(u_int, int);
-int rtdeletemsg(struct rtentry *, u_int);
int rtflushclone1(struct radix_node *, void *);
void rtflushclone(struct radix_node_head *, struct rtentry *);
int rt_if_remove_rtdelete(struct radix_node *, void *);
+#ifndef SMALL_KERNEL
+int rt_if_linkstate_change(struct radix_node *, void *);
+#endif
#define LABELID_MAX 50000
@@ -399,6 +401,8 @@ rtfree(struct rtentry *rt)
rt->rt_refcnt--;
if (rt->rt_refcnt <= 0 && (rt->rt_flags & RTF_UP) == 0) {
+ if (rt->rt_refcnt == 0 && (rt->rt_nodes->rn_flags & RNF_ACTIVE))
+ return; /* route still active but currently down */
if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
panic("rtfree 2");
rttrash--;
@@ -811,8 +815,16 @@ makeroute:
if (rt == NULL)
senderr(ENOBUFS);
Bzero(rt, sizeof(*rt));
- rt->rt_flags = RTF_UP | info->rti_flags;
+ rt->rt_flags = info->rti_flags;
rt->rt_priority = prio; /* init routing priority */
+ if ((LINK_STATE_IS_UP(ifa->ifa_ifp->if_link_state) ||
+ ifa->ifa_ifp->if_link_state == LINK_STATE_UNKNOWN) &&
+ ifa->ifa_ifp->if_flags & IFF_UP)
+ rt->rt_flags |= RTF_UP;
+ else {
+ rt->rt_flags &= ~RTF_UP;
+ rt->rt_priority |= RTP_DOWN;
+ }
LIST_INIT(&rt->rt_timer);
if (rt_setgate(rt, info->rti_info[RTAX_DST],
info->rti_info[RTAX_GATEWAY], tableid)) {
@@ -1395,3 +1407,56 @@ rt_if_remove_rtdelete(struct radix_node *rn, void *vifp)
return (0);
}
+
+#ifndef SMALL_KERNEL
+void
+rt_if_track(struct ifnet *ifp)
+{
+ struct radix_node_head *rnh;
+ int i;
+ u_int tid;
+
+ if (rt_tables == NULL)
+ return;
+
+ for (tid = 0; tid <= rtbl_id_max; tid++) {
+ for (i = 1; i <= AF_MAX; i++) {
+ if ((rnh = rt_gettable(i, tid)) != NULL) {
+ if (!rn_mpath_capable(rnh))
+ continue;
+ while ((*rnh->rnh_walktree)(rnh,
+ rt_if_linkstate_change, ifp) == EAGAIN)
+ ; /* nothing */
+ }
+ }
+ }
+}
+
+int
+rt_if_linkstate_change(struct radix_node *rn, void *arg)
+{
+ struct ifnet *ifp = arg;
+ struct rtentry *rt = (struct rtentry *)rn;
+
+ if (rt->rt_ifp == ifp) {
+ if ((LINK_STATE_IS_UP(ifp->if_link_state) ||
+ ifp->if_link_state == LINK_STATE_UNKNOWN) &&
+ ifp->if_flags & IFF_UP) {
+ if (!(rt->rt_flags & RTF_UP)) {
+ /* bring route up */
+ rt->rt_flags |= RTF_UP;
+ rn_mpath_reprio(rn, rt->rt_priority & RTP_MASK);
+ }
+ } else {
+ if (rt->rt_flags & RTF_UP) {
+ /* take route done */
+ rt->rt_flags &= ~RTF_UP;
+ rn_mpath_reprio(rn, rt->rt_priority | RTP_DOWN);
+ }
+ }
+ if_group_routechange(rt_key(rt), rt_mask(rt));
+ }
+
+ return (0);
+}
+#endif
diff --git a/sys/net/route.h b/sys/net/route.h
index 51effca0d05..a42efc2c139 100644
--- a/sys/net/route.h
+++ b/sys/net/route.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: route.h,v 1.53 2008/11/07 19:09:03 deraadt Exp $ */
+/* $OpenBSD: route.h,v 1.54 2008/11/24 12:53:53 claudio Exp $ */
/* $NetBSD: route.h,v 1.9 1996/02/13 22:00:49 christos Exp $ */
/*
@@ -413,6 +413,10 @@ void rtredirect(struct sockaddr *, struct sockaddr *,
int rtrequest1(int, struct rt_addrinfo *, u_int8_t, struct rtentry **,
u_int);
void rt_if_remove(struct ifnet *);
+#ifndef SMALL_KERNEL
+void rt_if_track(struct ifnet *);
+#endif
+int rtdeletemsg(struct rtentry *, u_int);
struct radix_node_head *rt_gettable(sa_family_t, u_int);
struct radix_node *rt_lookup(struct sockaddr *, struct sockaddr *, int);