diff --git a/nest/config.Y b/nest/config.Y index f889828a..2bc5a4ab 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -46,7 +46,7 @@ CF_KEYWORDS(ROUTER, ID, PROTOCOL, TEMPLATE, PREFERENCE, DISABLED, DEBUG, ALL, OF CF_KEYWORDS(INTERFACE, IMPORT, EXPORT, FILTER, NONE, TABLE, STATES, ROUTES, FILTERS) CF_KEYWORDS(PASSWORD, FROM, PASSIVE, TO, ID, EVENTS, PACKETS, PROTOCOLS, INTERFACES) CF_KEYWORDS(PRIMARY, STATS, COUNT, FOR, COMMANDS, PREEXPORT, GENERATE, ROA, MAX, FLUSH) -CF_KEYWORDS(LISTEN, BGP, V6ONLY, DUAL, ADDRESS, PORT, PASSWORDS, DESCRIPTION) +CF_KEYWORDS(LISTEN, BGP, V6ONLY, DUAL, ADDRESS, PORT, PASSWORDS, DESCRIPTION, SORTED) CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC) CF_ENUM(T_ENUM_RTS, RTS_, DUMMY, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIRECT, @@ -64,7 +64,7 @@ CF_ENUM(T_ENUM_ROA, ROA_, UNKNOWN, VALID, INVALID) %type roa_args %type roa_table_arg %type sym_args -%type proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_or_preexport roa_mode +%type proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_or_preexport roa_mode tab_sorted %type proto_patt proto_patt2 CF_GRAMMAR @@ -110,10 +110,17 @@ listen_opt: /* Creation of routing tables */ +tab_sorted: + { $$ = 0; } + | SORTED { $$ = 1; } + ; + CF_ADDTO(conf, newtab) -newtab: TABLE SYM { - rt_new_table($2); +newtab: TABLE SYM tab_sorted { + struct rtable_config *cf; + cf = rt_new_table($2); + cf->sorted = $3; } ; diff --git a/nest/route.h b/nest/route.h index 59daf803..524e69b3 100644 --- a/nest/route.h +++ b/nest/route.h @@ -121,6 +121,7 @@ struct rtable_config { struct proto_config *krt_attached; /* Kernel syncer attached to this table */ int gc_max_ops; /* Maximum number of operations before GC is run */ int gc_min_time; /* Minimum time between two consecutive GC runs */ + byte sorted; /* Routes of network are sorted according to rte_better() */ }; typedef struct rtable { diff --git a/nest/rt-table.c b/nest/rt-table.c index 32feafba..eb8304f7 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -423,7 +423,7 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol if (!new_best) return; - /* Third case, we use r insead of new_best, because export_filter() could change it */ + /* Third case, we use r instead of new_best, because export_filter() could change it */ if (r != new_changed) { if (new_free) @@ -432,7 +432,7 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol } /* Fourth case */ - for (; r; r=r->next) + for (r=r->next; r; r=r->next) { if (old_best = export_filter(ah, r, &old_free, NULL, 1)) goto found; @@ -642,20 +642,92 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, str if (old) stats->imp_routes--; - if (new) + if (table->config->sorted) { - for (k=&net->routes; *k; k=&(*k)->next) - if (rte_better(new, *k)) - break; + /* If routes are sorted, just insert new route to appropriate position */ + if (new) + { + if (before_old && !rte_better(new, before_old)) + k = &before_old->next; + else + k = &net->routes; - new->lastmod = now; - new->next = *k; - *k = new; + for (; *k; k=&(*k)->next) + if (rte_better(new, *k)) + break; - rte_trace_in(D_ROUTES, p, new, net->routes == new ? "added [best]" : "added"); + new->next = *k; + *k = new; + } + } + else + { + /* If routes are not sorted, find the best route and move it on + the first position. There are several optimized cases. */ + + if (src->rte_recalculate && src->rte_recalculate(table, net, new, old, old_best)) + goto do_recalculate; + + if (new && rte_better(new, old_best)) + { + /* The first case - the new route is cleary optimal, + we link it at the first position */ + + new->next = net->routes; + net->routes = new; + } + else if (old == old_best) + { + /* The second case - the old best route disappeared, we add the + new route (if we have any) to the list (we don't care about + position) and then we elect the new optimal route and relink + that route at the first position and announce it. New optimal + route might be NULL if there is no more routes */ + + do_recalculate: + /* Add the new route to the list */ + if (new) + { + new->next = net->routes; + net->routes = new; + } + + /* Find a new optimal route (if there is any) */ + if (net->routes) + { + rte **bp = &net->routes; + for (k=&(*bp)->next; *k; k=&(*k)->next) + if (rte_better(*k, *bp)) + bp = k; + + /* And relink it */ + rte *best = *bp; + *bp = best->next; + best->next = net->routes; + net->routes = best; + } + } + else if (new) + { + /* The third case - the new route is not better than the old + best route (therefore old_best != NULL) and the old best + route was not removed (therefore old_best == net->routes). + We just link the new route after the old best route. */ + + ASSERT(net->routes != NULL); + new->next = net->routes->next; + net->routes->next = new; + } + /* The fourth (empty) case - suboptimal route was removed, nothing to do */ } - /* Log the route removal */ + if (new) + new->lastmod = now; + + /* Log the route change */ + if (new) + rte_trace_in(D_ROUTES, p, new, net->routes == new ? "added [best]" : "added"); + if (!new && (p->debug & D_ROUTES)) { if (old != old_best) @@ -666,9 +738,12 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, str rte_trace_in(D_ROUTES, p, old, "removed [sole]"); } + /* Propagate the route change */ rte_announce(table, RA_ANY, net, new, old, NULL, tmpa); - rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, tmpa); - rte_announce(table, RA_ACCEPTED, net, new, old, before_old, tmpa); + if (net->routes != old_best) + rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, tmpa); + if (table->config->sorted) + rte_announce(table, RA_ACCEPTED, net, new, old, before_old, tmpa); if (!net->routes && (table->gc_counter++ >= table->config->gc_max_ops) && @@ -1336,6 +1411,8 @@ rt_commit(struct config *new, struct config *old) r->table = ot; ot->name = r->name; ot->config = r; + if (o->sorted != r->sorted) + log(L_WARN "Reconfiguration of rtable sorted flag not implemented"); } else { diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 4495c039..e5bc84dd 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -1258,7 +1258,8 @@ same_group(rte *r, u32 lpref, u32 lasn) static inline int use_deterministic_med(rte *r) { - return ((struct bgp_proto *) r->attrs->proto)->cf->deterministic_med; + struct proto *P = r->attrs->proto; + return (P->proto == &proto_bgp) && ((struct bgp_proto *) P)->cf->deterministic_med; } int diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 099a39a9..d59b4308 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -955,14 +955,14 @@ bgp_check_config(struct bgp_config *c) if (internal && c->rs_client) cf_error("Only external neighbor can be RS client"); - /* + if (c->multihop && (c->gw_mode == GW_DIRECT)) cf_error("Multihop BGP cannot use direct gateway mode"); if (c->multihop && (ipa_has_link_scope(c->remote_ip) || ipa_has_link_scope(c->source_addr))) cf_error("Multihop BGP cannot be used with link-local addresses"); - */ + /* Different default based on rs_client */ if (!c->missing_lladdr) @@ -970,8 +970,17 @@ bgp_check_config(struct bgp_config *c) /* Different default for gw_mode */ if (!c->gw_mode) - // c->gw_mode = (c->multihop || internal) ? GW_RECURSIVE : GW_DIRECT; - c->gw_mode = GW_DIRECT; + c->gw_mode = (c->multihop || internal) ? GW_RECURSIVE : GW_DIRECT; + + + if ((c->gw_mode == GW_RECURSIVE) && c->c.table->sorted) + cf_error("BGP in recursive mode prohibits sorted table"); + + if (c->deterministic_med && c->c.table->sorted) + cf_error("BGP with deterministic MED prohibits sorted table"); + + if (c->secondary && !c->c.table->sorted) + cf_error("BGP with secondary option requires sorted table"); } static int diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index 356415a2..f9a5be65 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -74,25 +74,19 @@ bgp_proto: | bgp_proto STARTUP HOLD TIME expr ';' { BGP_CFG->initial_hold_time = $5; } | bgp_proto CONNECT RETRY TIME expr ';' { BGP_CFG->connect_retry_time = $5; } | bgp_proto KEEPALIVE TIME expr ';' { BGP_CFG->keepalive_time = $4; } -/* | bgp_proto MULTIHOP ';' { BGP_CFG->multihop = 64; } | bgp_proto MULTIHOP expr ';' { BGP_CFG->multihop = $3; if (($3<1) || ($3>255)) cf_error("Multihop must be in range 1-255"); } -*/ | bgp_proto NEXT HOP SELF ';' { BGP_CFG->next_hop_self = 1; } | bgp_proto MISSING LLADDR SELF ';' { BGP_CFG->missing_lladdr = MLL_SELF; } | bgp_proto MISSING LLADDR DROP ';' { BGP_CFG->missing_lladdr = MLL_DROP; } | bgp_proto MISSING LLADDR IGNORE ';' { BGP_CFG->missing_lladdr = MLL_IGNORE; } -/* | bgp_proto GATEWAY DIRECT ';' { BGP_CFG->gw_mode = GW_DIRECT; } | bgp_proto GATEWAY RECURSIVE ';' { BGP_CFG->gw_mode = GW_RECURSIVE; } -*/ | bgp_proto PATH METRIC bool ';' { BGP_CFG->compare_path_lengths = $4; } | bgp_proto MED METRIC bool ';' { BGP_CFG->med_metric = $4; } | bgp_proto IGP METRIC bool ';' { BGP_CFG->igp_metric = $4; } | bgp_proto PREFER OLDER bool ';' { BGP_CFG->prefer_older = $4; } -/* | bgp_proto DETERMINISTIC MED bool ';' { BGP_CFG->deterministic_med = $4; } -*/ | bgp_proto DEFAULT BGP_MED expr ';' { BGP_CFG->default_med = $4; } | bgp_proto DEFAULT BGP_LOCAL_PREF expr ';' { BGP_CFG->default_local_pref = $4; } | bgp_proto SOURCE ADDRESS ipa ';' { BGP_CFG->source_addr = $4; }