summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorclaudio <claudio@openbsd.org>2019-06-20 13:18:19 +0000
committerclaudio <claudio@openbsd.org>2019-06-20 13:18:19 +0000
commit3f1c635632219903ee6e32286ec5f0a2de5127b5 (patch)
treee972bb7498bfbdf91f45e88ea8a6577dc0c87b87
parenttweaks with help from jmc@ (diff)
downloadwireguard-openbsd-3f1c635632219903ee6e32286ec5f0a2de5127b5.tar.xz
wireguard-openbsd-3f1c635632219903ee6e32286ec5f0a2de5127b5.zip
Change nexthop_update to run the list walk over all prefixes to run
asynchronously and therefor other tasks can make progress at the same time. Additionally prefixes belonging to a RIB which does not run the the decision process are no longer linked into the nexthop list. This replaces the early return in prefix_updateall() and reduces the time spent in nexthop_update(). OK benno@
-rw-r--r--usr.sbin/bgpd/rde.c12
-rw-r--r--usr.sbin/bgpd/rde.h10
-rw-r--r--usr.sbin/bgpd/rde_rib.c158
3 files changed, 123 insertions, 57 deletions
diff --git a/usr.sbin/bgpd/rde.c b/usr.sbin/bgpd/rde.c
index 58588a93127..a3c30b56a5c 100644
--- a/usr.sbin/bgpd/rde.c
+++ b/usr.sbin/bgpd/rde.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: rde.c,v 1.470 2019/06/17 13:35:43 claudio Exp $ */
+/* $OpenBSD: rde.c,v 1.471 2019/06/20 13:18:19 claudio Exp $ */
/*
* Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
@@ -256,9 +256,6 @@ rde_main(int debug, int verbose)
set_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se);
set_pollfd(&pfd[PFD_PIPE_SESSION_CTL], ibuf_se_ctl);
- if (rib_dump_pending() || rde_update_queue_pending())
- timeout = 0;
-
i = PFD_PIPE_COUNT;
for (mctx = LIST_FIRST(&rde_mrts); mctx != 0; mctx = xmctx) {
xmctx = LIST_NEXT(mctx, entry);
@@ -275,6 +272,10 @@ rde_main(int debug, int verbose)
}
}
+ if (rib_dump_pending() || rde_update_queue_pending() ||
+ nexthop_pending())
+ timeout = 0;
+
if (poll(pfd, i, timeout) == -1) {
if (errno != EINTR)
fatal("poll error");
@@ -311,12 +312,13 @@ rde_main(int debug, int verbose)
mctx = LIST_NEXT(mctx, entry);
}
+ rib_dump_runner();
+ nexthop_runner();
if (ibuf_se && ibuf_se->w.queued < SESS_MSG_HIGH_MARK) {
rde_update_queue_runner();
for (aid = AID_INET6; aid < AID_MAX; aid++)
rde_update6_queue_runner(aid);
}
- rib_dump_runner();
}
/* do not clean up on shutdown on production, it takes ages. */
diff --git a/usr.sbin/bgpd/rde.h b/usr.sbin/bgpd/rde.h
index 36fff8e2bf3..834acfbab9a 100644
--- a/usr.sbin/bgpd/rde.h
+++ b/usr.sbin/bgpd/rde.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: rde.h,v 1.214 2019/06/17 13:35:43 claudio Exp $ */
+/* $OpenBSD: rde.h,v 1.215 2019/06/20 13:18:19 claudio Exp $ */
/*
* Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> and
@@ -228,12 +228,15 @@ struct rde_aspath {
enum nexthop_state {
NEXTHOP_LOOKUP,
NEXTHOP_UNREACH,
- NEXTHOP_REACH
+ NEXTHOP_REACH,
+ NEXTHOP_FLAPPED
};
struct nexthop {
LIST_ENTRY(nexthop) nexthop_l;
+ TAILQ_ENTRY(nexthop) runner_l;
struct prefix_list prefix_h;
+ struct prefix *next_prefix;
struct bgpd_addr exit_nexthop;
struct bgpd_addr true_nexthop;
struct bgpd_addr nexthop_net;
@@ -246,6 +249,7 @@ struct nexthop {
#endif
int refcnt;
enum nexthop_state state;
+ enum nexthop_state oldstate;
u_int8_t nexthop_netlen;
u_int8_t flags;
#define NEXTHOP_CONNECTED 0x01
@@ -593,6 +597,8 @@ prefix_vstate(struct prefix *p)
void nexthop_init(u_int32_t);
void nexthop_shutdown(void);
+int nexthop_pending(void);
+void nexthop_runner(void);
void nexthop_modify(struct nexthop *, enum action_types, u_int8_t,
struct nexthop **, u_int8_t *);
void nexthop_link(struct prefix *);
diff --git a/usr.sbin/bgpd/rde_rib.c b/usr.sbin/bgpd/rde_rib.c
index 03f4f353414..fe0da884421 100644
--- a/usr.sbin/bgpd/rde_rib.c
+++ b/usr.sbin/bgpd/rde_rib.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: rde_rib.c,v 1.191 2019/06/17 11:02:20 claudio Exp $ */
+/* $OpenBSD: rde_rib.c,v 1.192 2019/06/20 13:18:19 claudio Exp $ */
/*
* Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org>
@@ -517,13 +517,15 @@ path_empty(struct rde_aspath *asp)
return (asp == NULL || asp->refcnt <= 0);
}
-static inline void
+static inline struct rde_aspath *
path_ref(struct rde_aspath *asp)
{
if ((asp->flags & F_ATTR_LINKED) == 0)
fatalx("%s: unlinked object", __func__);
asp->refcnt++;
rdemem.path_refs++;
+
+ return asp;
}
static inline void
@@ -955,19 +957,16 @@ prefix_move(struct prefix *p, struct rde_peer *peer,
fatalx("prefix_move: cross peer move");
np = prefix_alloc();
- np->aspath = asp;
- np->communities = comm;
- np->nexthop = nexthop_ref(nexthop);
- nexthop_link(np);
+ /* add reference to new AS path and communities */
+ np->aspath = path_ref(asp);
+ np->communities = communities_get(comm);
np->peer = peer;
np->re = p->re;
- np->lastchange = time(NULL);
- np->nhflags = nhflags;
np->validation_state = vstate;
-
- /* add reference to new AS path and communities */
- path_ref(asp);
- communities_get(comm);
+ np->nhflags = nhflags;
+ np->nexthop = nexthop_ref(nexthop);
+ nexthop_link(np);
+ np->lastchange = time(NULL);
/*
* no need to update the peer prefix count because we are only moving
@@ -988,18 +987,18 @@ prefix_move(struct prefix *p, struct rde_peer *peer,
/* remove old prefix node */
/* as before peer count needs no update because of move */
oasp = p->aspath;
- if (oasp)
- path_unref(oasp);
/* destroy all references to other objects and free the old prefix */
- p->aspath = NULL;
+ nexthop_unlink(p);
+ nexthop_put(p->nexthop);
communities_put(p->communities);
+ if (oasp)
+ path_unref(oasp);
p->communities = NULL;
+ p->nexthop = NULL;
+ p->aspath = NULL;
p->peer = NULL;
p->re = NULL;
- nexthop_unlink(p);
- nexthop_put(p->nexthop);
- p->nexthop = NULL;
prefix_free(p);
/* destroy old path if empty */
@@ -1252,19 +1251,23 @@ prefix_updateall(struct prefix *p, enum nexthop_state state,
enum nexthop_state oldstate)
{
/* Skip non local-RIBs or RIBs that are flagged as noeval. */
- if (re_rib(p->re)->flags & F_RIB_NOEVALUATE)
+ if (re_rib(p->re)->flags & F_RIB_NOEVALUATE) {
+ log_warnx("%s: prefix with F_RIB_NOEVALUATE hit", __func__);
return;
+ }
- if (oldstate == state && state == NEXTHOP_REACH) {
+ if (oldstate == state) {
/*
* The state of the nexthop did not change. The only
* thing that may have changed is the true_nexthop
* or other internal infos. This will not change
* the routing decision so shortcut here.
*/
- if ((re_rib(p->re)->flags & F_RIB_NOFIB) == 0 &&
- p == p->re->active)
- rde_send_kroute(re_rib(p->re), p, NULL);
+ if (state == NEXTHOP_REACH) {
+ if ((re_rib(p->re)->flags & F_RIB_NOFIB) == 0 &&
+ p == p->re->active)
+ rde_send_kroute(re_rib(p->re), p, NULL);
+ }
return;
}
@@ -1305,17 +1308,15 @@ prefix_link(struct prefix *p, struct rib_entry *re, struct rde_peer *peer,
struct rde_aspath *asp, struct rde_community *comm,
struct nexthop *nexthop, u_int8_t nhflags, u_int8_t vstate)
{
- path_ref(asp);
-
- p->aspath = asp;
- p->peer = peer;
+ p->aspath = path_ref(asp);
p->communities = communities_get(comm);
+ p->peer = peer;
+ p->re = re;
+ p->validation_state = vstate;
+ p->nhflags = nhflags;
p->nexthop = nexthop_ref(nexthop);
nexthop_link(p);
- p->re = re;
p->lastchange = time(NULL);
- p->nhflags = nhflags;
- p->validation_state = vstate;
/* make route decision */
prefix_evaluate(p, re);
@@ -1336,22 +1337,21 @@ prefix_unlink(struct prefix *p)
LIST_REMOVE(p, rib_l);
prefix_evaluate(NULL, re);
- if (p->aspath)
- path_unref(p->aspath);
-
- if (rib_empty(re))
- rib_remove(re);
-
/* destroy all references to other objects */
- communities_put(p->communities);
nexthop_unlink(p);
nexthop_put(p->nexthop);
+ communities_put(p->communities);
+ if (p->aspath)
+ path_unref(p->aspath);
p->communities = NULL;
p->nexthop = NULL;
p->aspath = NULL;
p->peer = NULL;
p->re = NULL;
+ if (rib_empty(re))
+ rib_remove(re);
+
/*
* It's the caller's duty to do accounting and remove empty aspath
* structures. Also freeing the unlinked prefix is the caller's duty.
@@ -1401,6 +1401,8 @@ struct nexthop_table {
SIPHASH_KEY nexthoptablekey;
+TAILQ_HEAD(nexthop_queue, nexthop) nexthop_runners;
+
void
nexthop_init(u_int32_t hashsize)
{
@@ -1412,6 +1414,7 @@ nexthop_init(u_int32_t hashsize)
if (nexthoptable.nexthop_hashtbl == NULL)
fatal("nextop_init");
+ TAILQ_INIT(&nexthop_runners);
for (i = 0; i < hs; i++)
LIST_INIT(&nexthoptable.nexthop_hashtbl[i]);
arc4random_buf(&nexthoptablekey, sizeof(nexthoptablekey));
@@ -1443,12 +1446,45 @@ nexthop_shutdown(void)
free(nexthoptable.nexthop_hashtbl);
}
+int
+nexthop_pending(void)
+{
+ return !TAILQ_EMPTY(&nexthop_runners);
+}
+
+void
+nexthop_runner(void)
+{
+ struct nexthop *nh;
+ struct prefix *p;
+ u_int32_t j;
+
+ nh = TAILQ_FIRST(&nexthop_runners);
+ if (nh == NULL)
+ return;
+
+ /* remove from runnner queue */
+ TAILQ_REMOVE(&nexthop_runners, nh, runner_l);
+
+ p = nh->next_prefix;
+ for (j = 0; p != NULL && j < RDE_RUNNER_ROUNDS; j++) {
+ prefix_updateall(p, nh->state, nh->oldstate);
+ p = LIST_NEXT(p, nexthop_l);
+ }
+
+ /* prep for next run, if not finished readd to tail of queue */
+ nh->next_prefix = p;
+ if (p != NULL)
+ TAILQ_INSERT_TAIL(&nexthop_runners, nh, runner_l);
+ else
+ log_debug("nexthop %s update finished",
+ log_addr(&nh->exit_nexthop));
+}
+
void
nexthop_update(struct kroute_nexthop *msg)
{
struct nexthop *nh;
- struct prefix *p;
- enum nexthop_state oldstate;
nh = nexthop_lookup(&msg->nexthop);
if (nh == NULL) {
@@ -1457,7 +1493,20 @@ nexthop_update(struct kroute_nexthop *msg)
return;
}
- oldstate = nh->state;
+ nh->oldstate = nh->state;
+ if (nh->oldstate == NEXTHOP_LOOKUP)
+ /* drop reference which was hold during the lookup */
+ if (nexthop_put(nh))
+ return; /* nh lost last ref, no work left */
+
+ if (nh->next_prefix) {
+ /*
+ * If nexthop_runner() is not finished with this nexthop
+ * then ensure that all prefixes are updated by setting
+ * the oldstate to NEXTHOP_FLAPPED.
+ */
+ nh->oldstate = NEXTHOP_FLAPPED;
+ }
if (msg->valid)
nh->state = NEXTHOP_REACH;
@@ -1476,21 +1525,16 @@ nexthop_update(struct kroute_nexthop *msg)
sizeof(nh->nexthop_net));
nh->nexthop_netlen = msg->netlen;
- if (oldstate == NEXTHOP_LOOKUP)
- /* drop reference which was hold during the lookup */
- if (nexthop_put(nh))
- return;
-
if (rde_noevaluate())
/*
* if the decision process is turned off there is no need
- * for the aspath list walk.
+ * for the prefix list walk.
*/
return;
- LIST_FOREACH(p, &nh->prefix_h, nexthop_l) {
- prefix_updateall(p, nh->state, oldstate);
- }
+ nh->next_prefix = LIST_FIRST(&nh->prefix_h);
+ TAILQ_INSERT_HEAD(&nexthop_runners, nh, runner_l);
+ log_debug("nexthop %s update starting", log_addr(&nh->exit_nexthop));
}
void
@@ -1532,6 +1576,10 @@ nexthop_link(struct prefix *p)
if (p->nexthop == NULL)
return;
+ /* no need to link prefixes in RIBs that have no decision process */
+ if (re_rib(p->re)->flags & F_RIB_NOEVALUATE)
+ return;
+
LIST_INSERT_HEAD(&p->nexthop->prefix_h, p, nexthop_l);
}
@@ -1541,6 +1589,12 @@ nexthop_unlink(struct prefix *p)
if (p->nexthop == NULL)
return;
+ if (re_rib(p->re)->flags & F_RIB_NOEVALUATE)
+ return;
+
+ if (p == p->nexthop->next_prefix)
+ p->nexthop->next_prefix = LIST_NEXT(p, nexthop_l);
+
LIST_REMOVE(p, nexthop_l);
}
@@ -1588,7 +1642,11 @@ nexthop_put(struct nexthop *nh)
/* sanity check */
if (!LIST_EMPTY(&nh->prefix_h) || nh->state == NEXTHOP_LOOKUP)
- fatalx("nexthop_put: refcnt error");
+ fatalx("%s: refcnt error", __func__);
+
+ /* is nexthop update running? impossible, that is a refcnt error */
+ if (nh->next_prefix)
+ fatalx("%s: next_prefix not NULL", __func__);
LIST_REMOVE(nh, nexthop_l);
rde_send_nexthop(&nh->exit_nexthop, 0);