BGP: implement Adj-RIB-In

The patch implements optional internal import table to a channel and
hooks it to BGP so it can be used as Adj-RIB-In. When enabled, all
received (pre-filtered) routes are stored there and import filters can
be re-evaluated without explicit route refresh. An import table can be
examined using e.g. 'show route import table bgp1.ipv4'.
This commit is contained in:
Ondrej Zajicek (work) 2018-09-27 22:57:55 +02:00
parent 01fd00f5ed
commit 682d3f7de0
9 changed files with 235 additions and 8 deletions

View file

@ -558,6 +558,16 @@ r_args:
rt_show_add_table($$, t->table);
$$->tables_defined_by = RSD_TDB_ALL;
}
| r_args IMPORT TABLE SYM '.' r_args_channel {
$$ = $1;
struct proto_config *cf = (void *) $4->def;
if ($4->class != SYM_PROTO || !cf->proto) cf_error("%s is not a protocol", $4->name);
struct channel *c = proto_find_channel_by_name(cf->proto, $6);
if (!c) cf_error("Channel %s.%s not found", $4->name, $6);
if (!c->in_table) cf_error("No import table in channel %s.%s", $4->name, $6);
rt_show_add_table($$, c->in_table);
$$->tables_defined_by = RSD_TDB_DIRECT;
}
| r_args FILTER filter {
$$ = $1;
if ($$->filter != FILTER_ACCEPT) cf_error("Filter specified twice");

View file

@ -284,6 +284,54 @@ channel_stop_export(struct channel *c)
c->stats.exp_routes = 0;
}
/* Called by protocol for reload from in_table */
void
channel_schedule_reload(struct channel *c)
{
ASSERT(c->channel_state == CS_UP);
rt_reload_channel_abort(c);
ev_schedule(c->reload_event);
}
static void
channel_reload_loop(void *ptr)
{
struct channel *c = ptr;
if (!rt_reload_channel(c))
{
ev_schedule(c->reload_event);
return;
}
}
static void
channel_reset_import(struct channel *c)
{
/* Need to abort feeding */
ev_postpone(c->reload_event);
rt_reload_channel_abort(c);
rt_prune_sync(c->in_table, 1);
}
/* Called by protocol to activate in_table */
void
channel_setup_in_table(struct channel *c)
{
struct rtable_config *cf = mb_allocz(c->proto->pool, sizeof(struct rtable_config));
cf->name = "import";
cf->addr_type = c->net_type;
c->in_table = mb_allocz(c->proto->pool, sizeof(struct rtable));
rt_setup(c->proto->pool, c->in_table, cf);
c->reload_event = ev_new_init(c->proto->pool, channel_reload_loop, c);
}
static void
channel_do_start(struct channel *c)
{
@ -315,6 +363,8 @@ channel_do_flush(struct channel *c)
static void
channel_do_down(struct channel *c)
{
ASSERT(!c->feed_active && !c->reload_active);
rem_node(&c->table_node);
rt_unlock_table(c->table);
c->proto->active_channels--;
@ -324,6 +374,9 @@ channel_do_down(struct channel *c)
memset(&c->stats, 0, sizeof(struct proto_stats));
c->in_table = NULL;
c->reload_event = NULL;
CALL(c->channel->cleanup, c);
/* Schedule protocol shutddown */
@ -355,6 +408,9 @@ channel_set_state(struct channel *c, uint state)
if (es != ES_DOWN)
channel_stop_export(c);
if (c->in_table && (cs == CS_UP))
channel_reset_import(c);
break;
case CS_UP:
@ -374,6 +430,9 @@ channel_set_state(struct channel *c, uint state)
if (es != ES_DOWN)
channel_stop_export(c);
if (c->in_table && (cs == CS_UP))
channel_reset_import(c);
channel_do_flush(c);
break;

View file

@ -306,8 +306,6 @@ rte_make_tmp_attrs(struct rte **rt, struct linpool *pool)
(*rt)->attrs->eattrs = ea;
}
/* Moved from route.h to avoid dependency conflicts */
static inline void rte_update(struct proto *p, const net_addr *n, rte *new) { rte_update2(p->main_channel, n, new, p->main_source); }
extern pool *proto_pool;
extern list proto_list;
@ -539,6 +537,11 @@ struct channel {
btime last_state_change; /* Time of last state transition */
btime last_tx_filter_change;
struct rtable *in_table; /* Internal table for received routes */
struct event *reload_event; /* Event responsible for reloading from in_table */
struct fib_iterator reload_fit; /* Iterator in in_table used during reloading */
u8 reload_active; /* Iterator reload_fit is linked */
};
@ -606,6 +609,8 @@ struct channel *proto_add_channel(struct proto *p, struct channel_config *cf);
int proto_configure_channel(struct proto *p, struct channel **c, struct channel_config *cf);
void channel_set_state(struct channel *c, uint state);
void channel_setup_in_table(struct channel *c);
void channel_schedule_reload(struct channel *c);
static inline void channel_init(struct channel *c) { channel_set_state(c, CS_START); }
static inline void channel_open(struct channel *c) { channel_set_state(c, CS_UP); }
@ -617,4 +622,17 @@ void *channel_config_get(const struct channel_class *cc, const char *name, uint
int channel_reconfigure(struct channel *c, struct channel_config *cf);
/* Moved from route.h to avoid dependency conflicts */
static inline void rte_update(struct proto *p, const net_addr *n, rte *new) { rte_update2(p->main_channel, n, new, p->main_source); }
static inline void
rte_update3(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
{
if (c->in_table && !rte_update_in(c, n, new, src))
return;
rte_update2(c, n, new, src);
}
#endif

View file

@ -311,8 +311,13 @@ void rt_dump(rtable *);
void rt_dump_all(void);
int rt_feed_channel(struct channel *c);
void rt_feed_channel_abort(struct channel *c);
int rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src);
int rt_reload_channel(struct channel *c);
void rt_reload_channel_abort(struct channel *c);
void rt_prune_sync(rtable *t, int all);
struct rtable_config *rt_new_table(struct symbol *s, uint addr_type);
/* Default limit for ECMP next hops, defined in sysdep code */
extern const int rt_default_ecmp;

View file

@ -2288,6 +2288,127 @@ rt_feed_channel_abort(struct channel *c)
}
}
int
rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
{
rte *old, **pos;
net *net;
if (new)
{
net = net_get(c->in_table, n);
if (!new->pref)
new->pref = c->preference;
if (!rta_is_cached(new->attrs))
new->attrs = rta_lookup(new->attrs);
}
else
{
net = net_find(c->in_table, n);
if (!net)
return 0;
}
/* Find the old rte */
for (pos = &net->routes; old = *pos; pos = &old->next)
if (old->attrs->src == src)
{
if (new && rte_same(old, new))
return 0;
/* Remove the old rte */
*pos = old->next;
rte_free_quick(old);
break;
}
if (!new)
return !!old;
/* Insert the new rte */
rte *e = rte_do_cow(new);
e->flags |= REF_COW;
e->net = net;
e->sender = c;
e->lastmod = current_time();
e->next = *pos;
*pos = e;
return 1;
}
int
rt_reload_channel(struct channel *c)
{
struct rtable *tab = c->in_table;
struct fib_iterator *fit = &c->reload_fit;
int max_feed = 64;
ASSERT(c->channel_state == CS_UP);
if (!c->reload_active)
{
FIB_ITERATE_INIT(fit, &tab->fib);
c->reload_active = 1;
}
FIB_ITERATE_START(&tab->fib, fit, net, n)
{
if (max_feed <= 0)
{
FIB_ITERATE_PUT(fit);
return 0;
}
for (rte *e = n->routes; e; e = e->next)
{
rte_update2(c, n->n.addr, rte_do_cow(e), e->attrs->src);
max_feed--;
}
}
FIB_ITERATE_END;
c->reload_active = 0;
return 1;
}
void
rt_reload_channel_abort(struct channel *c)
{
if (c->reload_active)
{
/* Unlink the iterator */
fit_get(&c->in_table->fib, &c->reload_fit);
c->reload_active = 0;
}
}
void
rt_prune_sync(rtable *t, int all)
{
FIB_WALK(&t->fib, net, n)
{
rte *e, **ee = &n->routes;
while (e = *ee)
{
if (all || (e->flags & (REF_STALE | REF_DISCARD)))
{
*ee = e->next;
rte_free_quick(e);
}
else
ee = &e->next;
}
}
FIB_WALK_END;
}
static inline u32
hc_hash(ip_addr a, rtable *dep)
{

View file

@ -545,7 +545,7 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
int active = loc->ready && rem->ready;
c->c.disabled = !active;
c->c.reloadable = p->route_refresh;
c->c.reloadable = p->route_refresh || c->cf->import_table;
c->index = active ? num++ : 0;
@ -838,6 +838,9 @@ bgp_refresh_begin(struct bgp_channel *c)
c->load_state = BFS_REFRESHING;
rt_refresh_begin(c->c.table, &c->c);
if (c->c.in_table)
rt_refresh_begin(c->c.in_table, &c->c);
}
/**
@ -859,6 +862,9 @@ bgp_refresh_end(struct bgp_channel *c)
c->load_state = BFS_NONE;
rt_refresh_end(c->c.table, &c->c);
if (c->c.in_table)
rt_prune_sync(c->c.in_table, 0);
}
@ -1296,8 +1302,11 @@ bgp_reload_routes(struct channel *C)
struct bgp_proto *p = (void *) C->proto;
struct bgp_channel *c = (void *) C;
ASSERT(p->conn && p->route_refresh);
ASSERT(p->conn && (p->route_refresh || c->c.in_table));
if (c->c.in_table)
channel_schedule_reload(C);
else
bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH);
}
@ -1598,6 +1607,9 @@ bgp_channel_start(struct channel *C)
bgp_init_bucket_table(c);
bgp_init_prefix_table(c);
if (c->cf->import_table)
channel_setup_in_table(C);
c->stale_timer = tm_new_init(c->pool, bgp_long_lived_stale_timeout, c, 0, 0);
c->next_hop_addr = c->cf->next_hop_addr;

View file

@ -144,6 +144,7 @@ struct bgp_channel_config {
uint llgr_time; /* Long-lived graceful restart stale time */
u8 ext_next_hop; /* Allow both IPv4 and IPv6 next hops */
u8 add_path; /* Use ADD-PATH extension [RFC 7911] */
u8 import_table; /* Use c.in_table as Adj-RIB-In */
uint rest[0]; /* Remaining items are reconfigured separately */
struct rtable_config *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */

View file

@ -29,7 +29,7 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE,
SECURITY, DETERMINISTIC, SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX,
GRACEFUL, RESTART, AWARE, CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY,
STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6, LONG,
LIVED, STALE)
LIVED, STALE, IMPORT)
%type <i32> bgp_afi
@ -229,6 +229,7 @@ bgp_channel_item:
| ADD PATHS RX { BGP_CC->add_path = BGP_ADD_PATH_RX; }
| ADD PATHS TX { BGP_CC->add_path = BGP_ADD_PATH_TX; }
| ADD PATHS bool { BGP_CC->add_path = $3 ? BGP_ADD_PATH_FULL : 0; }
| IMPORT TABLE bool { BGP_CC->import_table = $3; }
| IGP TABLE rtable {
if (BGP_CC->desc->no_igp)
cf_error("IGP table not allowed here");

View file

@ -1159,7 +1159,7 @@ bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0)
if (!a0)
{
/* Route withdraw */
rte_update2(&s->channel->c, n, NULL, s->last_src);
rte_update3(&s->channel->c, n, NULL, s->last_src);
return;
}
@ -1180,7 +1180,7 @@ bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0)
e->pflags = 0;
e->u.bgp.suppressed = 0;
e->u.bgp.stale = -1;
rte_update2(&s->channel->c, n, e, s->last_src);
rte_update3(&s->channel->c, n, e, s->last_src);
}
static void