diff options
author | 2008-11-24 12:53:53 +0000 | |
---|---|---|
committer | 2008-11-24 12:53:53 +0000 | |
commit | c79b0376db813d069b2a4c1a948b6af178f1a47d (patch) | |
tree | ed0ee6d52ba00cff9249f9080906a8782079fd3b | |
parent | drop the requirement that the rx ring has to be filled with mbufs. we only (diff) | |
download | wireguard-openbsd-c79b0376db813d069b2a4c1a948b6af178f1a47d.tar.xz wireguard-openbsd-c79b0376db813d069b2a4c1a948b6af178f1a47d.zip |
Implement link-state tracking on the routing table. Routes to interfaces
which are considered down will no be marked ~RTF_UP and so multipath routing
will start to work as expected and not pump 50% of the traffic to nirvana.
Most of the magic happens in rn_mpath_reprio() which fiddles with the
routing table internals. The rest is more straight forward.
get it in deraadt@
-rw-r--r-- | sys/net/if.c | 11 | ||||
-rw-r--r-- | sys/net/radix_mpath.c | 103 | ||||
-rw-r--r-- | sys/net/radix_mpath.h | 3 | ||||
-rw-r--r-- | sys/net/route.c | 71 | ||||
-rw-r--r-- | sys/net/route.h | 6 |
5 files changed, 187 insertions, 7 deletions
diff --git a/sys/net/if.c b/sys/net/if.c index 916df0176eb..8bfb97df149 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if.c,v 1.175 2008/11/21 18:01:30 claudio Exp $ */ +/* $OpenBSD: if.c,v 1.176 2008/11/24 12:53:53 claudio Exp $ */ /* $NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $ */ /* @@ -1066,6 +1066,9 @@ if_down(struct ifnet *ifp) bstp_ifstate(ifp); #endif rt_ifmsg(ifp); +#ifndef SMALL_KERNEL + rt_if_track(ifp); +#endif } /* @@ -1102,6 +1105,9 @@ if_up(struct ifnet *ifp) #ifdef INET6 in6_if_up(ifp); #endif +#ifndef SMALL_KERNEL + rt_if_track(ifp); +#endif } /* @@ -1112,6 +1118,9 @@ void if_link_state_change(struct ifnet *ifp) { rt_ifmsg(ifp); +#ifndef SMALL_KERNEL + rt_if_track(ifp); +#endif dohooks(ifp->if_linkstatehooks, 0); } diff --git a/sys/net/radix_mpath.c b/sys/net/radix_mpath.c index c2718c206c0..933a04af505 100644 --- a/sys/net/radix_mpath.c +++ b/sys/net/radix_mpath.c @@ -1,4 +1,4 @@ -/* $OpenBSD: radix_mpath.c,v 1.12 2008/11/21 18:01:30 claudio Exp $ */ +/* $OpenBSD: radix_mpath.c,v 1.13 2008/11/24 12:53:53 claudio Exp $ */ /* $KAME: radix_mpath.c,v 1.13 2002/10/28 21:05:59 itojun Exp $ */ /* @@ -57,6 +57,9 @@ u_int32_t rn_mpath_hash(struct route *, u_int32_t *); * give some jitter to hash, to avoid synchronization between routers */ static u_int32_t hashjitter; +#ifdef RN_DEBUG +extern struct radix_node *rn_clist; +#endif int rn_mpath_capable(struct radix_node_head *rnh) @@ -106,6 +109,104 @@ rn_mpath_prio(struct radix_node *rn, u_int8_t prio) return (prev); } +void +rn_mpath_reprio(struct radix_node *rn, int newprio) +{ + struct radix_node *prev = rn->rn_p; + struct radix_node *next = rn->rn_dupedkey; + struct radix_node *t, *tt, *saved_tt; + struct rtentry *rt = (struct rtentry *)rn; + int mid, oldprio, prioinv = 0; + + oldprio = rt->rt_priority; + rt->rt_priority = newprio; + + /* same prio, no change needed */ + if (oldprio == newprio) + return; + if (rn_mpath_next(rn, 1) == NULL) { + /* no need to move node route is alone */ + if (prev->rn_mask != rn->rn_mask) + return; + /* ... or route is last and prio gets bigger */ + if (oldprio < newprio) + return; + } + + /* remove node from dupedkey list and reinsert at correct place */ + if (prev->rn_dupedkey == rn) { + prev->rn_dupedkey = next; + if (next) + next->rn_p = prev; + else + next = prev; + } else { + if (next == NULL) + panic("next == NULL"); + next->rn_p = prev; + if (prev->rn_l == rn) + prev->rn_l = next; + else + prev->rn_r = next; + } + + /* re-insert rn at the right spot */ + for (tt = next; tt->rn_p->rn_mask == rn->rn_mask; + tt = tt->rn_p) + ; + saved_tt = tt; + + /* + * Stolen from radix.c rn_addroute(). + * This is nasty code with a certain amount of magic and dragons. + * t is the element where the re-priorized rn is inserted -- before + * or after depending on prioinv. tt and saved_tt are just helpers. + */ + tt = rn_mpath_prio(tt, newprio); + if (((struct rtentry *)tt)->rt_priority != newprio) { + if (((struct rtentry *)tt)->rt_priority > newprio) + prioinv = 1; + t = tt; + } else { + mid = rn_mpath_count(tt) / 2; + do { + t = tt; + tt = rn_mpath_next(tt, 0); + } while (tt && --mid > 0); + } + + if (tt == saved_tt && prioinv) { + /* link in at head of list */ + rn->rn_dupedkey = tt; + rn->rn_p = tt->rn_p; + tt->rn_p = rn; + if (rn->rn_p->rn_l == tt) + rn->rn_p->rn_l = rn; + else + rn->rn_p->rn_r = rn; + } else if (prioinv == 1) { + rn->rn_dupedkey = t; + t->rn_p->rn_dupedkey = rn; + rn->rn_p = t->rn_p; + t->rn_p = rn; + } else { + rn->rn_dupedkey = t->rn_dupedkey; + t->rn_dupedkey = rn; + rn->rn_p = t; + if (rn->rn_dupedkey) + rn->rn_dupedkey->rn_p = rn; + } +#ifdef RN_DEBUG + /* readd at head of creation list */ + for (t = rn_clist; t && t->rn_ybro != rn; t->rn_ybro) + ; + if (t) + t->rn_ybro = rn->rn_ybro; + rn->rn_ybro = rn_clist; + rn_clist = rn; +#endif +} + int rn_mpath_count(struct radix_node *rn) { diff --git a/sys/net/radix_mpath.h b/sys/net/radix_mpath.h index 2829397870a..ad0871e7a26 100644 --- a/sys/net/radix_mpath.h +++ b/sys/net/radix_mpath.h @@ -1,4 +1,4 @@ -/* $OpenBSD: radix_mpath.h,v 1.7 2008/11/21 18:01:30 claudio Exp $ */ +/* $OpenBSD: radix_mpath.h,v 1.8 2008/11/24 12:53:53 claudio Exp $ */ /* $KAME: radix_mpath.h,v 1.9 2004/03/30 11:21:49 keiichi Exp $ */ /* @@ -47,6 +47,7 @@ struct sockaddr; int rn_mpath_capable(struct radix_node_head *); struct radix_node *rn_mpath_next(struct radix_node *, int); struct radix_node *rn_mpath_prio(struct radix_node *, u_int8_t); +void rn_mpath_reprio(struct radix_node *, int); int rn_mpath_count(struct radix_node *); struct rtentry *rt_mpath_matchgate(struct rtentry *, struct sockaddr *, u_int8_t); diff --git a/sys/net/route.c b/sys/net/route.c index f35317bbf7d..a9feb004fb0 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -1,4 +1,4 @@ -/* $OpenBSD: route.c,v 1.98 2008/11/21 18:01:30 claudio Exp $ */ +/* $OpenBSD: route.c,v 1.99 2008/11/24 12:53:53 claudio Exp $ */ /* $NetBSD: route.c,v 1.14 1996/02/13 22:00:46 christos Exp $ */ /* @@ -144,10 +144,12 @@ struct pool rttimer_pool; /* pool for rttimer structures */ int rtable_init(struct radix_node_head ***); int okaytoclone(u_int, int); -int rtdeletemsg(struct rtentry *, u_int); int rtflushclone1(struct radix_node *, void *); void rtflushclone(struct radix_node_head *, struct rtentry *); int rt_if_remove_rtdelete(struct radix_node *, void *); +#ifndef SMALL_KERNEL +int rt_if_linkstate_change(struct radix_node *, void *); +#endif #define LABELID_MAX 50000 @@ -399,6 +401,8 @@ rtfree(struct rtentry *rt) rt->rt_refcnt--; if (rt->rt_refcnt <= 0 && (rt->rt_flags & RTF_UP) == 0) { + if (rt->rt_refcnt == 0 && (rt->rt_nodes->rn_flags & RNF_ACTIVE)) + return; /* route still active but currently down */ if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)) panic("rtfree 2"); rttrash--; @@ -811,8 +815,16 @@ makeroute: if (rt == NULL) senderr(ENOBUFS); Bzero(rt, sizeof(*rt)); - rt->rt_flags = RTF_UP | info->rti_flags; + rt->rt_flags = info->rti_flags; rt->rt_priority = prio; /* init routing priority */ + if ((LINK_STATE_IS_UP(ifa->ifa_ifp->if_link_state) || + ifa->ifa_ifp->if_link_state == LINK_STATE_UNKNOWN) && + ifa->ifa_ifp->if_flags & IFF_UP) + rt->rt_flags |= RTF_UP; + else { + rt->rt_flags &= ~RTF_UP; + rt->rt_priority |= RTP_DOWN; + } LIST_INIT(&rt->rt_timer); if (rt_setgate(rt, info->rti_info[RTAX_DST], info->rti_info[RTAX_GATEWAY], tableid)) { @@ -1395,3 +1407,56 @@ rt_if_remove_rtdelete(struct radix_node *rn, void *vifp) return (0); } + +#ifndef SMALL_KERNEL +void +rt_if_track(struct ifnet *ifp) +{ + struct radix_node_head *rnh; + int i; + u_int tid; + + if (rt_tables == NULL) + return; + + for (tid = 0; tid <= rtbl_id_max; tid++) { + for (i = 1; i <= AF_MAX; i++) { + if ((rnh = rt_gettable(i, tid)) != NULL) { + if (!rn_mpath_capable(rnh)) + continue; + while ((*rnh->rnh_walktree)(rnh, + rt_if_linkstate_change, ifp) == EAGAIN) + ; /* nothing */ + } + } + } +} + +int +rt_if_linkstate_change(struct radix_node *rn, void *arg) +{ + struct ifnet *ifp = arg; + struct rtentry *rt = (struct rtentry *)rn; + + if (rt->rt_ifp == ifp) { + if ((LINK_STATE_IS_UP(ifp->if_link_state) || + ifp->if_link_state == LINK_STATE_UNKNOWN) && + ifp->if_flags & IFF_UP) { + if (!(rt->rt_flags & RTF_UP)) { + /* bring route up */ + rt->rt_flags |= RTF_UP; + rn_mpath_reprio(rn, rt->rt_priority & RTP_MASK); + } + } else { + if (rt->rt_flags & RTF_UP) { + /* take route done */ + rt->rt_flags &= ~RTF_UP; + rn_mpath_reprio(rn, rt->rt_priority | RTP_DOWN); + } + } + if_group_routechange(rt_key(rt), rt_mask(rt)); + } + + return (0); +} +#endif diff --git a/sys/net/route.h b/sys/net/route.h index 51effca0d05..a42efc2c139 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -1,4 +1,4 @@ -/* $OpenBSD: route.h,v 1.53 2008/11/07 19:09:03 deraadt Exp $ */ +/* $OpenBSD: route.h,v 1.54 2008/11/24 12:53:53 claudio Exp $ */ /* $NetBSD: route.h,v 1.9 1996/02/13 22:00:49 christos Exp $ */ /* @@ -413,6 +413,10 @@ void rtredirect(struct sockaddr *, struct sockaddr *, int rtrequest1(int, struct rt_addrinfo *, u_int8_t, struct rtentry **, u_int); void rt_if_remove(struct ifnet *); +#ifndef SMALL_KERNEL +void rt_if_track(struct ifnet *); +#endif +int rtdeletemsg(struct rtentry *, u_int); struct radix_node_head *rt_gettable(sa_family_t, u_int); struct radix_node *rt_lookup(struct sockaddr *, struct sockaddr *, int); |