diff options
author | 2019-06-20 13:18:19 +0000 | |
---|---|---|
committer | 2019-06-20 13:18:19 +0000 | |
commit | 3f1c635632219903ee6e32286ec5f0a2de5127b5 (patch) | |
tree | e972bb7498bfbdf91f45e88ea8a6577dc0c87b87 | |
parent | tweaks with help from jmc@ (diff) | |
download | wireguard-openbsd-3f1c635632219903ee6e32286ec5f0a2de5127b5.tar.xz wireguard-openbsd-3f1c635632219903ee6e32286ec5f0a2de5127b5.zip |
Change nexthop_update to run the list walk over all prefixes to run
asynchronously and therefor other tasks can make progress at the same
time. Additionally prefixes belonging to a RIB which does not run the
the decision process are no longer linked into the nexthop list.
This replaces the early return in prefix_updateall() and reduces the
time spent in nexthop_update().
OK benno@
-rw-r--r-- | usr.sbin/bgpd/rde.c | 12 | ||||
-rw-r--r-- | usr.sbin/bgpd/rde.h | 10 | ||||
-rw-r--r-- | usr.sbin/bgpd/rde_rib.c | 158 |
3 files changed, 123 insertions, 57 deletions
diff --git a/usr.sbin/bgpd/rde.c b/usr.sbin/bgpd/rde.c index 58588a93127..a3c30b56a5c 100644 --- a/usr.sbin/bgpd/rde.c +++ b/usr.sbin/bgpd/rde.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.c,v 1.470 2019/06/17 13:35:43 claudio Exp $ */ +/* $OpenBSD: rde.c,v 1.471 2019/06/20 13:18:19 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org> @@ -256,9 +256,6 @@ rde_main(int debug, int verbose) set_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se); set_pollfd(&pfd[PFD_PIPE_SESSION_CTL], ibuf_se_ctl); - if (rib_dump_pending() || rde_update_queue_pending()) - timeout = 0; - i = PFD_PIPE_COUNT; for (mctx = LIST_FIRST(&rde_mrts); mctx != 0; mctx = xmctx) { xmctx = LIST_NEXT(mctx, entry); @@ -275,6 +272,10 @@ rde_main(int debug, int verbose) } } + if (rib_dump_pending() || rde_update_queue_pending() || + nexthop_pending()) + timeout = 0; + if (poll(pfd, i, timeout) == -1) { if (errno != EINTR) fatal("poll error"); @@ -311,12 +312,13 @@ rde_main(int debug, int verbose) mctx = LIST_NEXT(mctx, entry); } + rib_dump_runner(); + nexthop_runner(); if (ibuf_se && ibuf_se->w.queued < SESS_MSG_HIGH_MARK) { rde_update_queue_runner(); for (aid = AID_INET6; aid < AID_MAX; aid++) rde_update6_queue_runner(aid); } - rib_dump_runner(); } /* do not clean up on shutdown on production, it takes ages. */ diff --git a/usr.sbin/bgpd/rde.h b/usr.sbin/bgpd/rde.h index 36fff8e2bf3..834acfbab9a 100644 --- a/usr.sbin/bgpd/rde.h +++ b/usr.sbin/bgpd/rde.h @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.h,v 1.214 2019/06/17 13:35:43 claudio Exp $ */ +/* $OpenBSD: rde.h,v 1.215 2019/06/20 13:18:19 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> and @@ -228,12 +228,15 @@ struct rde_aspath { enum nexthop_state { NEXTHOP_LOOKUP, NEXTHOP_UNREACH, - NEXTHOP_REACH + NEXTHOP_REACH, + NEXTHOP_FLAPPED }; struct nexthop { LIST_ENTRY(nexthop) nexthop_l; + TAILQ_ENTRY(nexthop) runner_l; struct prefix_list prefix_h; + struct prefix *next_prefix; struct bgpd_addr exit_nexthop; struct bgpd_addr true_nexthop; struct bgpd_addr nexthop_net; @@ -246,6 +249,7 @@ struct nexthop { #endif int refcnt; enum nexthop_state state; + enum nexthop_state oldstate; u_int8_t nexthop_netlen; u_int8_t flags; #define NEXTHOP_CONNECTED 0x01 @@ -593,6 +597,8 @@ prefix_vstate(struct prefix *p) void nexthop_init(u_int32_t); void nexthop_shutdown(void); +int nexthop_pending(void); +void nexthop_runner(void); void nexthop_modify(struct nexthop *, enum action_types, u_int8_t, struct nexthop **, u_int8_t *); void nexthop_link(struct prefix *); diff --git a/usr.sbin/bgpd/rde_rib.c b/usr.sbin/bgpd/rde_rib.c index 03f4f353414..fe0da884421 100644 --- a/usr.sbin/bgpd/rde_rib.c +++ b/usr.sbin/bgpd/rde_rib.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde_rib.c,v 1.191 2019/06/17 11:02:20 claudio Exp $ */ +/* $OpenBSD: rde_rib.c,v 1.192 2019/06/20 13:18:19 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> @@ -517,13 +517,15 @@ path_empty(struct rde_aspath *asp) return (asp == NULL || asp->refcnt <= 0); } -static inline void +static inline struct rde_aspath * path_ref(struct rde_aspath *asp) { if ((asp->flags & F_ATTR_LINKED) == 0) fatalx("%s: unlinked object", __func__); asp->refcnt++; rdemem.path_refs++; + + return asp; } static inline void @@ -955,19 +957,16 @@ prefix_move(struct prefix *p, struct rde_peer *peer, fatalx("prefix_move: cross peer move"); np = prefix_alloc(); - np->aspath = asp; - np->communities = comm; - np->nexthop = nexthop_ref(nexthop); - nexthop_link(np); + /* add reference to new AS path and communities */ + np->aspath = path_ref(asp); + np->communities = communities_get(comm); np->peer = peer; np->re = p->re; - np->lastchange = time(NULL); - np->nhflags = nhflags; np->validation_state = vstate; - - /* add reference to new AS path and communities */ - path_ref(asp); - communities_get(comm); + np->nhflags = nhflags; + np->nexthop = nexthop_ref(nexthop); + nexthop_link(np); + np->lastchange = time(NULL); /* * no need to update the peer prefix count because we are only moving @@ -988,18 +987,18 @@ prefix_move(struct prefix *p, struct rde_peer *peer, /* remove old prefix node */ /* as before peer count needs no update because of move */ oasp = p->aspath; - if (oasp) - path_unref(oasp); /* destroy all references to other objects and free the old prefix */ - p->aspath = NULL; + nexthop_unlink(p); + nexthop_put(p->nexthop); communities_put(p->communities); + if (oasp) + path_unref(oasp); p->communities = NULL; + p->nexthop = NULL; + p->aspath = NULL; p->peer = NULL; p->re = NULL; - nexthop_unlink(p); - nexthop_put(p->nexthop); - p->nexthop = NULL; prefix_free(p); /* destroy old path if empty */ @@ -1252,19 +1251,23 @@ prefix_updateall(struct prefix *p, enum nexthop_state state, enum nexthop_state oldstate) { /* Skip non local-RIBs or RIBs that are flagged as noeval. */ - if (re_rib(p->re)->flags & F_RIB_NOEVALUATE) + if (re_rib(p->re)->flags & F_RIB_NOEVALUATE) { + log_warnx("%s: prefix with F_RIB_NOEVALUATE hit", __func__); return; + } - if (oldstate == state && state == NEXTHOP_REACH) { + if (oldstate == state) { /* * The state of the nexthop did not change. The only * thing that may have changed is the true_nexthop * or other internal infos. This will not change * the routing decision so shortcut here. */ - if ((re_rib(p->re)->flags & F_RIB_NOFIB) == 0 && - p == p->re->active) - rde_send_kroute(re_rib(p->re), p, NULL); + if (state == NEXTHOP_REACH) { + if ((re_rib(p->re)->flags & F_RIB_NOFIB) == 0 && + p == p->re->active) + rde_send_kroute(re_rib(p->re), p, NULL); + } return; } @@ -1305,17 +1308,15 @@ prefix_link(struct prefix *p, struct rib_entry *re, struct rde_peer *peer, struct rde_aspath *asp, struct rde_community *comm, struct nexthop *nexthop, u_int8_t nhflags, u_int8_t vstate) { - path_ref(asp); - - p->aspath = asp; - p->peer = peer; + p->aspath = path_ref(asp); p->communities = communities_get(comm); + p->peer = peer; + p->re = re; + p->validation_state = vstate; + p->nhflags = nhflags; p->nexthop = nexthop_ref(nexthop); nexthop_link(p); - p->re = re; p->lastchange = time(NULL); - p->nhflags = nhflags; - p->validation_state = vstate; /* make route decision */ prefix_evaluate(p, re); @@ -1336,22 +1337,21 @@ prefix_unlink(struct prefix *p) LIST_REMOVE(p, rib_l); prefix_evaluate(NULL, re); - if (p->aspath) - path_unref(p->aspath); - - if (rib_empty(re)) - rib_remove(re); - /* destroy all references to other objects */ - communities_put(p->communities); nexthop_unlink(p); nexthop_put(p->nexthop); + communities_put(p->communities); + if (p->aspath) + path_unref(p->aspath); p->communities = NULL; p->nexthop = NULL; p->aspath = NULL; p->peer = NULL; p->re = NULL; + if (rib_empty(re)) + rib_remove(re); + /* * It's the caller's duty to do accounting and remove empty aspath * structures. Also freeing the unlinked prefix is the caller's duty. @@ -1401,6 +1401,8 @@ struct nexthop_table { SIPHASH_KEY nexthoptablekey; +TAILQ_HEAD(nexthop_queue, nexthop) nexthop_runners; + void nexthop_init(u_int32_t hashsize) { @@ -1412,6 +1414,7 @@ nexthop_init(u_int32_t hashsize) if (nexthoptable.nexthop_hashtbl == NULL) fatal("nextop_init"); + TAILQ_INIT(&nexthop_runners); for (i = 0; i < hs; i++) LIST_INIT(&nexthoptable.nexthop_hashtbl[i]); arc4random_buf(&nexthoptablekey, sizeof(nexthoptablekey)); @@ -1443,12 +1446,45 @@ nexthop_shutdown(void) free(nexthoptable.nexthop_hashtbl); } +int +nexthop_pending(void) +{ + return !TAILQ_EMPTY(&nexthop_runners); +} + +void +nexthop_runner(void) +{ + struct nexthop *nh; + struct prefix *p; + u_int32_t j; + + nh = TAILQ_FIRST(&nexthop_runners); + if (nh == NULL) + return; + + /* remove from runnner queue */ + TAILQ_REMOVE(&nexthop_runners, nh, runner_l); + + p = nh->next_prefix; + for (j = 0; p != NULL && j < RDE_RUNNER_ROUNDS; j++) { + prefix_updateall(p, nh->state, nh->oldstate); + p = LIST_NEXT(p, nexthop_l); + } + + /* prep for next run, if not finished readd to tail of queue */ + nh->next_prefix = p; + if (p != NULL) + TAILQ_INSERT_TAIL(&nexthop_runners, nh, runner_l); + else + log_debug("nexthop %s update finished", + log_addr(&nh->exit_nexthop)); +} + void nexthop_update(struct kroute_nexthop *msg) { struct nexthop *nh; - struct prefix *p; - enum nexthop_state oldstate; nh = nexthop_lookup(&msg->nexthop); if (nh == NULL) { @@ -1457,7 +1493,20 @@ nexthop_update(struct kroute_nexthop *msg) return; } - oldstate = nh->state; + nh->oldstate = nh->state; + if (nh->oldstate == NEXTHOP_LOOKUP) + /* drop reference which was hold during the lookup */ + if (nexthop_put(nh)) + return; /* nh lost last ref, no work left */ + + if (nh->next_prefix) { + /* + * If nexthop_runner() is not finished with this nexthop + * then ensure that all prefixes are updated by setting + * the oldstate to NEXTHOP_FLAPPED. + */ + nh->oldstate = NEXTHOP_FLAPPED; + } if (msg->valid) nh->state = NEXTHOP_REACH; @@ -1476,21 +1525,16 @@ nexthop_update(struct kroute_nexthop *msg) sizeof(nh->nexthop_net)); nh->nexthop_netlen = msg->netlen; - if (oldstate == NEXTHOP_LOOKUP) - /* drop reference which was hold during the lookup */ - if (nexthop_put(nh)) - return; - if (rde_noevaluate()) /* * if the decision process is turned off there is no need - * for the aspath list walk. + * for the prefix list walk. */ return; - LIST_FOREACH(p, &nh->prefix_h, nexthop_l) { - prefix_updateall(p, nh->state, oldstate); - } + nh->next_prefix = LIST_FIRST(&nh->prefix_h); + TAILQ_INSERT_HEAD(&nexthop_runners, nh, runner_l); + log_debug("nexthop %s update starting", log_addr(&nh->exit_nexthop)); } void @@ -1532,6 +1576,10 @@ nexthop_link(struct prefix *p) if (p->nexthop == NULL) return; + /* no need to link prefixes in RIBs that have no decision process */ + if (re_rib(p->re)->flags & F_RIB_NOEVALUATE) + return; + LIST_INSERT_HEAD(&p->nexthop->prefix_h, p, nexthop_l); } @@ -1541,6 +1589,12 @@ nexthop_unlink(struct prefix *p) if (p->nexthop == NULL) return; + if (re_rib(p->re)->flags & F_RIB_NOEVALUATE) + return; + + if (p == p->nexthop->next_prefix) + p->nexthop->next_prefix = LIST_NEXT(p, nexthop_l); + LIST_REMOVE(p, nexthop_l); } @@ -1588,7 +1642,11 @@ nexthop_put(struct nexthop *nh) /* sanity check */ if (!LIST_EMPTY(&nh->prefix_h) || nh->state == NEXTHOP_LOOKUP) - fatalx("nexthop_put: refcnt error"); + fatalx("%s: refcnt error", __func__); + + /* is nexthop update running? impossible, that is a refcnt error */ + if (nh->next_prefix) + fatalx("%s: next_prefix not NULL", __func__); LIST_REMOVE(nh, nexthop_l); rde_send_nexthop(&nh->exit_nexthop, 0); |