From 682d3f7de0905ca2e853844734cce7ff65f7d77d Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Thu, 27 Sep 2018 22:57:55 +0200 Subject: [PATCH] BGP: implement Adj-RIB-In The patch implements optional internal import table to a channel and hooks it to BGP so it can be used as Adj-RIB-In. When enabled, all received (pre-filtered) routes are stored there and import filters can be re-evaluated without explicit route refresh. An import table can be examined using e.g. 'show route import table bgp1.ipv4'. --- nest/config.Y | 10 ++++ nest/proto.c | 59 +++++++++++++++++++++ nest/protocol.h | 22 +++++++- nest/route.h | 5 ++ nest/rt-table.c | 121 ++++++++++++++++++++++++++++++++++++++++++++ proto/bgp/bgp.c | 18 +++++-- proto/bgp/bgp.h | 1 + proto/bgp/config.Y | 3 +- proto/bgp/packets.c | 4 +- 9 files changed, 235 insertions(+), 8 deletions(-) diff --git a/nest/config.Y b/nest/config.Y index 34bde3fa..aef5ed46 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -558,6 +558,16 @@ r_args: rt_show_add_table($$, t->table); $$->tables_defined_by = RSD_TDB_ALL; } + | r_args IMPORT TABLE SYM '.' r_args_channel { + $$ = $1; + struct proto_config *cf = (void *) $4->def; + if ($4->class != SYM_PROTO || !cf->proto) cf_error("%s is not a protocol", $4->name); + struct channel *c = proto_find_channel_by_name(cf->proto, $6); + if (!c) cf_error("Channel %s.%s not found", $4->name, $6); + if (!c->in_table) cf_error("No import table in channel %s.%s", $4->name, $6); + rt_show_add_table($$, c->in_table); + $$->tables_defined_by = RSD_TDB_DIRECT; + } | r_args FILTER filter { $$ = $1; if ($$->filter != FILTER_ACCEPT) cf_error("Filter specified twice"); diff --git a/nest/proto.c b/nest/proto.c index a6aa4e5c..7849b604 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -284,6 +284,54 @@ channel_stop_export(struct channel *c) c->stats.exp_routes = 0; } + +/* Called by protocol for reload from in_table */ +void +channel_schedule_reload(struct channel *c) +{ + ASSERT(c->channel_state == CS_UP); + + rt_reload_channel_abort(c); + ev_schedule(c->reload_event); +} + +static void +channel_reload_loop(void *ptr) +{ + struct channel *c = ptr; + + if (!rt_reload_channel(c)) + { + ev_schedule(c->reload_event); + return; + } +} + +static void +channel_reset_import(struct channel *c) +{ + /* Need to abort feeding */ + ev_postpone(c->reload_event); + rt_reload_channel_abort(c); + + rt_prune_sync(c->in_table, 1); +} + +/* Called by protocol to activate in_table */ +void +channel_setup_in_table(struct channel *c) +{ + struct rtable_config *cf = mb_allocz(c->proto->pool, sizeof(struct rtable_config)); + cf->name = "import"; + cf->addr_type = c->net_type; + + c->in_table = mb_allocz(c->proto->pool, sizeof(struct rtable)); + rt_setup(c->proto->pool, c->in_table, cf); + + c->reload_event = ev_new_init(c->proto->pool, channel_reload_loop, c); +} + + static void channel_do_start(struct channel *c) { @@ -315,6 +363,8 @@ channel_do_flush(struct channel *c) static void channel_do_down(struct channel *c) { + ASSERT(!c->feed_active && !c->reload_active); + rem_node(&c->table_node); rt_unlock_table(c->table); c->proto->active_channels--; @@ -324,6 +374,9 @@ channel_do_down(struct channel *c) memset(&c->stats, 0, sizeof(struct proto_stats)); + c->in_table = NULL; + c->reload_event = NULL; + CALL(c->channel->cleanup, c); /* Schedule protocol shutddown */ @@ -355,6 +408,9 @@ channel_set_state(struct channel *c, uint state) if (es != ES_DOWN) channel_stop_export(c); + if (c->in_table && (cs == CS_UP)) + channel_reset_import(c); + break; case CS_UP: @@ -374,6 +430,9 @@ channel_set_state(struct channel *c, uint state) if (es != ES_DOWN) channel_stop_export(c); + if (c->in_table && (cs == CS_UP)) + channel_reset_import(c); + channel_do_flush(c); break; diff --git a/nest/protocol.h b/nest/protocol.h index 3008087b..aa836f38 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -306,8 +306,6 @@ rte_make_tmp_attrs(struct rte **rt, struct linpool *pool) (*rt)->attrs->eattrs = ea; } -/* Moved from route.h to avoid dependency conflicts */ -static inline void rte_update(struct proto *p, const net_addr *n, rte *new) { rte_update2(p->main_channel, n, new, p->main_source); } extern pool *proto_pool; extern list proto_list; @@ -539,6 +537,11 @@ struct channel { btime last_state_change; /* Time of last state transition */ btime last_tx_filter_change; + + struct rtable *in_table; /* Internal table for received routes */ + struct event *reload_event; /* Event responsible for reloading from in_table */ + struct fib_iterator reload_fit; /* Iterator in in_table used during reloading */ + u8 reload_active; /* Iterator reload_fit is linked */ }; @@ -606,6 +609,8 @@ struct channel *proto_add_channel(struct proto *p, struct channel_config *cf); int proto_configure_channel(struct proto *p, struct channel **c, struct channel_config *cf); void channel_set_state(struct channel *c, uint state); +void channel_setup_in_table(struct channel *c); +void channel_schedule_reload(struct channel *c); static inline void channel_init(struct channel *c) { channel_set_state(c, CS_START); } static inline void channel_open(struct channel *c) { channel_set_state(c, CS_UP); } @@ -617,4 +622,17 @@ void *channel_config_get(const struct channel_class *cc, const char *name, uint int channel_reconfigure(struct channel *c, struct channel_config *cf); +/* Moved from route.h to avoid dependency conflicts */ +static inline void rte_update(struct proto *p, const net_addr *n, rte *new) { rte_update2(p->main_channel, n, new, p->main_source); } + +static inline void +rte_update3(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) +{ + if (c->in_table && !rte_update_in(c, n, new, src)) + return; + + rte_update2(c, n, new, src); +} + + #endif diff --git a/nest/route.h b/nest/route.h index 2600f087..7a683f9e 100644 --- a/nest/route.h +++ b/nest/route.h @@ -311,8 +311,13 @@ void rt_dump(rtable *); void rt_dump_all(void); int rt_feed_channel(struct channel *c); void rt_feed_channel_abort(struct channel *c); +int rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); +int rt_reload_channel(struct channel *c); +void rt_reload_channel_abort(struct channel *c); +void rt_prune_sync(rtable *t, int all); struct rtable_config *rt_new_table(struct symbol *s, uint addr_type); + /* Default limit for ECMP next hops, defined in sysdep code */ extern const int rt_default_ecmp; diff --git a/nest/rt-table.c b/nest/rt-table.c index 21b6622e..5beb1be9 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -2288,6 +2288,127 @@ rt_feed_channel_abort(struct channel *c) } } + +int +rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) +{ + rte *old, **pos; + net *net; + + if (new) + { + net = net_get(c->in_table, n); + + if (!new->pref) + new->pref = c->preference; + + if (!rta_is_cached(new->attrs)) + new->attrs = rta_lookup(new->attrs); + } + else + { + net = net_find(c->in_table, n); + + if (!net) + return 0; + } + + /* Find the old rte */ + for (pos = &net->routes; old = *pos; pos = &old->next) + if (old->attrs->src == src) + { + if (new && rte_same(old, new)) + return 0; + + /* Remove the old rte */ + *pos = old->next; + rte_free_quick(old); + + break; + } + + if (!new) + return !!old; + + /* Insert the new rte */ + rte *e = rte_do_cow(new); + e->flags |= REF_COW; + e->net = net; + e->sender = c; + e->lastmod = current_time(); + e->next = *pos; + *pos = e; + + return 1; +} + +int +rt_reload_channel(struct channel *c) +{ + struct rtable *tab = c->in_table; + struct fib_iterator *fit = &c->reload_fit; + int max_feed = 64; + + ASSERT(c->channel_state == CS_UP); + + if (!c->reload_active) + { + FIB_ITERATE_INIT(fit, &tab->fib); + c->reload_active = 1; + } + + FIB_ITERATE_START(&tab->fib, fit, net, n) + { + if (max_feed <= 0) + { + FIB_ITERATE_PUT(fit); + return 0; + } + + for (rte *e = n->routes; e; e = e->next) + { + rte_update2(c, n->n.addr, rte_do_cow(e), e->attrs->src); + max_feed--; + } + } + FIB_ITERATE_END; + + c->reload_active = 0; + return 1; +} + +void +rt_reload_channel_abort(struct channel *c) +{ + if (c->reload_active) + { + /* Unlink the iterator */ + fit_get(&c->in_table->fib, &c->reload_fit); + c->reload_active = 0; + } +} + +void +rt_prune_sync(rtable *t, int all) +{ + FIB_WALK(&t->fib, net, n) + { + rte *e, **ee = &n->routes; + while (e = *ee) + { + if (all || (e->flags & (REF_STALE | REF_DISCARD))) + { + *ee = e->next; + rte_free_quick(e); + } + else + ee = &e->next; + } + } + FIB_WALK_END; +} + + static inline u32 hc_hash(ip_addr a, rtable *dep) { diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index e20097ae..b2dbd780 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -545,7 +545,7 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) int active = loc->ready && rem->ready; c->c.disabled = !active; - c->c.reloadable = p->route_refresh; + c->c.reloadable = p->route_refresh || c->cf->import_table; c->index = active ? num++ : 0; @@ -838,6 +838,9 @@ bgp_refresh_begin(struct bgp_channel *c) c->load_state = BFS_REFRESHING; rt_refresh_begin(c->c.table, &c->c); + + if (c->c.in_table) + rt_refresh_begin(c->c.in_table, &c->c); } /** @@ -859,6 +862,9 @@ bgp_refresh_end(struct bgp_channel *c) c->load_state = BFS_NONE; rt_refresh_end(c->c.table, &c->c); + + if (c->c.in_table) + rt_prune_sync(c->c.in_table, 0); } @@ -1296,9 +1302,12 @@ bgp_reload_routes(struct channel *C) struct bgp_proto *p = (void *) C->proto; struct bgp_channel *c = (void *) C; - ASSERT(p->conn && p->route_refresh); + ASSERT(p->conn && (p->route_refresh || c->c.in_table)); - bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH); + if (c->c.in_table) + channel_schedule_reload(C); + else + bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH); } static void @@ -1598,6 +1607,9 @@ bgp_channel_start(struct channel *C) bgp_init_bucket_table(c); bgp_init_prefix_table(c); + if (c->cf->import_table) + channel_setup_in_table(C); + c->stale_timer = tm_new_init(c->pool, bgp_long_lived_stale_timeout, c, 0, 0); c->next_hop_addr = c->cf->next_hop_addr; diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index e1ff013a..76b835fa 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -144,6 +144,7 @@ struct bgp_channel_config { uint llgr_time; /* Long-lived graceful restart stale time */ u8 ext_next_hop; /* Allow both IPv4 and IPv6 next hops */ u8 add_path; /* Use ADD-PATH extension [RFC 7911] */ + u8 import_table; /* Use c.in_table as Adj-RIB-In */ uint rest[0]; /* Remaining items are reconfigured separately */ struct rtable_config *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */ diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index 120b1e88..f155eee2 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -29,7 +29,7 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE, SECURITY, DETERMINISTIC, SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX, GRACEFUL, RESTART, AWARE, CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY, STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6, LONG, - LIVED, STALE) + LIVED, STALE, IMPORT) %type bgp_afi @@ -229,6 +229,7 @@ bgp_channel_item: | ADD PATHS RX { BGP_CC->add_path = BGP_ADD_PATH_RX; } | ADD PATHS TX { BGP_CC->add_path = BGP_ADD_PATH_TX; } | ADD PATHS bool { BGP_CC->add_path = $3 ? BGP_ADD_PATH_FULL : 0; } + | IMPORT TABLE bool { BGP_CC->import_table = $3; } | IGP TABLE rtable { if (BGP_CC->desc->no_igp) cf_error("IGP table not allowed here"); diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 3be48c00..c2261870 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -1159,7 +1159,7 @@ bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0) if (!a0) { /* Route withdraw */ - rte_update2(&s->channel->c, n, NULL, s->last_src); + rte_update3(&s->channel->c, n, NULL, s->last_src); return; } @@ -1180,7 +1180,7 @@ bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0) e->pflags = 0; e->u.bgp.suppressed = 0; e->u.bgp.stale = -1; - rte_update2(&s->channel->c, n, e, s->last_src); + rte_update3(&s->channel->c, n, e, s->last_src); } static void