BGP multipath support

Kernel option 'merge paths' allows to merge routes exported to kernel
protocol (currently BGP and static routes) to multipath routes.
This commit is contained in:
Ondrej Zajicek 2015-06-08 02:20:43 +02:00
parent db027a41d4
commit 8d9eef1771
14 changed files with 368 additions and 36 deletions

View file

@ -2227,6 +2227,18 @@ limitations can be overcome using another routing table and the pipe protocol.
a graceful restart recovery is active, the Kernel protocol will defer a graceful restart recovery is active, the Kernel protocol will defer
synchronization of routing tables until the end of the recovery. Note synchronization of routing tables until the end of the recovery. Note
that import of kernel routes to BIRD is not affected. that import of kernel routes to BIRD is not affected.
<tag>merge paths <M>switch</M> [limit <M>number</M>]</tag>
Usually, only best routes are exported to the kernel protocol. With path
merging enabled, both best routes and equivalent non-best routes are
merged during export to generate one ECMP (equal-cost multipath) route
for each network. This is useful e.g. for BGP multipath. Note that best
routes are still pivotal for route export (responsible for most
properties of resulting ECMP routes), while exported non-best routes are
responsible just for additional multipath next hops. This option also
allows to specify a limit on maximal number of nexthops in one route. By
default, multipath merging is disabled. If enabled, default value of the
limit is 16.
</descrip> </descrip>
<sect1>Attributes <sect1>Attributes

View file

@ -471,26 +471,22 @@ static inline void f_rte_cow(void)
static void static void
f_rta_cow(void) f_rta_cow(void)
{ {
if ((*f_rte)->attrs->aflags & RTAF_CACHED) { if (!rta_is_cached((*f_rte)->attrs))
return;
/* Prepare to modify rte */ /* Prepare to modify rte */
f_rte_cow(); f_rte_cow();
/* Store old rta to free it later */ /* Store old rta to free it later, it stores reference from rte_cow() */
f_old_rta = (*f_rte)->attrs; f_old_rta = (*f_rte)->attrs;
/* /*
* Alloc new rta, do shallow copy and update rte. Fields eattrs * Get shallow copy of rta. Fields eattrs and nexthops of rta are shared
* and nexthops of rta are shared with f_old_rta (they will be * with f_old_rta (they will be copied when the cached rta will be obtained
* copied when the cached rta will be obtained at the end of * at the end of f_run()), also the lock of hostentry is inherited (we
* f_run()), also the lock of hostentry is inherited (we suppose * suppose hostentry is not changed by filters).
* hostentry is not changed by filters).
*/ */
rta *ra = lp_alloc(f_pool, sizeof(rta)); (*f_rte)->attrs = rta_do_cow((*f_rte)->attrs, f_pool);
memcpy(ra, f_old_rta, sizeof(rta));
ra->aflags = 0;
(*f_rte)->attrs = ra;
}
} }
static struct tbf rl_runtime_err = TBF_DEFAULT_LOG_LIMITS; static struct tbf rl_runtime_err = TBF_DEFAULT_LOG_LIMITS;

View file

@ -31,6 +31,7 @@
#endif #endif
#define ABS(a) ((a)>=0 ? (a) : -(a)) #define ABS(a) ((a)>=0 ? (a) : -(a))
#define DELTA(a,b) (((a)>=(b))?(a)-(b):(b)-(a))
#define ARRAY_SIZE(a) (sizeof(a)/sizeof(*(a))) #define ARRAY_SIZE(a) (sizeof(a)/sizeof(*(a)))

View file

@ -158,6 +158,7 @@ struct proto {
byte gr_wait; /* Route export to protocol is postponed until graceful restart */ byte gr_wait; /* Route export to protocol is postponed until graceful restart */
byte down_sched; /* Shutdown is scheduled for later (PDS_*) */ byte down_sched; /* Shutdown is scheduled for later (PDS_*) */
byte down_code; /* Reason for shutdown (PDC_* codes) */ byte down_code; /* Reason for shutdown (PDC_* codes) */
byte merge_limit; /* Maximal number of nexthops for RA_MERGED */
u32 hash_key; /* Random key used for hashing of neighbors */ u32 hash_key; /* Random key used for hashing of neighbors */
bird_clock_t last_state_change; /* Time of last state transition */ bird_clock_t last_state_change; /* Time of last state transition */
char *last_state_name_announced; /* Last state name we've announced to the user */ char *last_state_name_announced; /* Last state name we've announced to the user */
@ -200,6 +201,7 @@ struct proto {
* rte_recalculate Called at the beginning of the best route selection * rte_recalculate Called at the beginning of the best route selection
* rte_better Compare two rte's and decide which one is better (1=first, 0=second). * rte_better Compare two rte's and decide which one is better (1=first, 0=second).
* rte_same Compare two rte's and decide whether they are identical (1=yes, 0=no). * rte_same Compare two rte's and decide whether they are identical (1=yes, 0=no).
* rte_mergable Compare two rte's and decide whether they could be merged (1=yes, 0=no).
* rte_insert Called whenever a rte is inserted to a routing table. * rte_insert Called whenever a rte is inserted to a routing table.
* rte_remove Called whenever a rte is removed from the routing table. * rte_remove Called whenever a rte is removed from the routing table.
*/ */
@ -207,6 +209,7 @@ struct proto {
int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *); int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *);
int (*rte_better)(struct rte *, struct rte *); int (*rte_better)(struct rte *, struct rte *);
int (*rte_same)(struct rte *, struct rte *); int (*rte_same)(struct rte *, struct rte *);
int (*rte_mergable)(struct rte *, struct rte *);
void (*rte_insert)(struct network *, struct rte *); void (*rte_insert)(struct network *, struct rte *);
void (*rte_remove)(struct network *, struct rte *); void (*rte_remove)(struct network *, struct rte *);

View file

@ -240,6 +240,7 @@ static inline int rte_is_filtered(rte *r) { return !!(r->flags & REF_FILTERED);
#define RA_OPTIMAL 1 /* Announcement of optimal route change */ #define RA_OPTIMAL 1 /* Announcement of optimal route change */
#define RA_ACCEPTED 2 /* Announcement of first accepted route */ #define RA_ACCEPTED 2 /* Announcement of first accepted route */
#define RA_ANY 3 /* Announcement of any route change */ #define RA_ANY 3 /* Announcement of any route change */
#define RA_MERGED 4 /* Announcement of optimal route merged with next ones */
/* Return value of import_control() callback */ /* Return value of import_control() callback */
#define RIC_ACCEPT 1 /* Accepted by protocol */ #define RIC_ACCEPT 1 /* Accepted by protocol */
@ -263,12 +264,14 @@ void rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *s
static inline void rte_update(struct proto *p, net *net, rte *new) { rte_update2(p->main_ahook, net, new, p->main_source); } static inline void rte_update(struct proto *p, net *net, rte *new) { rte_update2(p->main_ahook, net, new, p->main_source); }
void rte_discard(rtable *tab, rte *old); void rte_discard(rtable *tab, rte *old);
int rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter); int rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter);
rte *rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, struct ea_list **tmpa, int silent);
void rt_refresh_begin(rtable *t, struct announce_hook *ah); void rt_refresh_begin(rtable *t, struct announce_hook *ah);
void rt_refresh_end(rtable *t, struct announce_hook *ah); void rt_refresh_end(rtable *t, struct announce_hook *ah);
void rte_dump(rte *); void rte_dump(rte *);
void rte_free(rte *); void rte_free(rte *);
rte *rte_do_cow(rte *); rte *rte_do_cow(rte *);
static inline rte * rte_cow(rte *r) { return (r->flags & REF_COW) ? rte_do_cow(r) : r; } static inline rte * rte_cow(rte *r) { return (r->flags & REF_COW) ? rte_do_cow(r) : r; }
rte *rte_cow_rta(rte *r, linpool *lp);
void rt_dump(rtable *); void rt_dump(rtable *);
void rt_dump_all(void); void rt_dump_all(void);
int rt_feed_baby(struct proto *p); int rt_feed_baby(struct proto *p);
@ -388,6 +391,12 @@ typedef struct rta {
#define IGP_METRIC_UNKNOWN 0x80000000 /* Default igp_metric used when no other #define IGP_METRIC_UNKNOWN 0x80000000 /* Default igp_metric used when no other
protocol-specific metric is availabe */ protocol-specific metric is availabe */
/* Route has regular, reachable nexthop (i.e. not RTD_UNREACHABLE and like) */
static inline int rte_is_reachable(rte *r)
{ uint d = r->attrs->dest; return (d == RTD_ROUTER) || (d == RTD_DEVICE) || (d == RTD_MULTIPATH); }
/* /*
* Extended Route Attributes * Extended Route Attributes
*/ */
@ -490,6 +499,8 @@ static inline int rta_is_cached(rta *r) { return r->aflags & RTAF_CACHED; }
static inline rta *rta_clone(rta *r) { r->uc++; return r; } static inline rta *rta_clone(rta *r) { r->uc++; return r; }
void rta__free(rta *r); void rta__free(rta *r);
static inline void rta_free(rta *r) { if (r && !--r->uc) rta__free(r); } static inline void rta_free(rta *r) { if (r && !--r->uc) rta__free(r); }
rta *rta_do_cow(rta *o, linpool *lp);
static inline rta * rta_cow(rta *r, linpool *lp) { return rta_is_cached(r) ? rta_do_cow(r, lp) : r; }
void rta_dump(rta *); void rta_dump(rta *);
void rta_dump_all(void); void rta_dump_all(void);
void rta_show(struct cli *, rta *, ea_list *); void rta_show(struct cli *, rta *, ea_list *);

View file

@ -1138,6 +1138,16 @@ rta__free(rta *a)
sl_free(rta_slab, a); sl_free(rta_slab, a);
} }
rta *
rta_do_cow(rta *o, linpool *lp)
{
rta *r = lp_alloc(lp, sizeof(rta));
memcpy(r, o, sizeof(rta));
r->aflags = 0;
r->uc = 0;
return r;
}
/** /**
* rta_dump - dump route attributes * rta_dump - dump route attributes
* @a: attribute structure to dump * @a: attribute structure to dump

View file

@ -144,6 +144,38 @@ rte_do_cow(rte *r)
return e; return e;
} }
/**
* rte_cow_rta - get a private writable copy of &rte with writable &rta
* @r: a route entry to be copied
* @lp: a linpool from which to allocate &rta
*
* rte_cow_rta() takes a &rte and prepares it and associated &rta for
* modification. There are three possibilities: First, both &rte and &rta are
* private copies, in that case they are returned unchanged. Second, &rte is
* private copy, but &rta is cached, in that case &rta is duplicated using
* rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
* both structures are duplicated by rte_do_cow() and rta_do_cow().
*
* Note that in the second case, cached &rta loses one reference, while private
* copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
* nexthops, ...) with it. To work properly, original shared &rta should have
* another reference during the life of created private copy.
*
* Result: a pointer to the new writable &rte with writable &rta.
*/
rte *
rte_cow_rta(rte *r, linpool *lp)
{
if (!rta_is_cached(r->attrs))
return r;
rte *e = rte_cow(r);
rta *a = rta_do_cow(r->attrs, lp);
rta_free(e->attrs);
e->attrs = a;
return e;
}
static int /* Actually better or at least as good as */ static int /* Actually better or at least as good as */
rte_better(rte *new, rte *old) rte_better(rte *new, rte *old)
{ {
@ -172,6 +204,26 @@ rte_better(rte *new, rte *old)
return 0; return 0;
} }
static int
rte_mergable(rte *pri, rte *sec)
{
int (*mergable)(rte *, rte *);
if (!rte_is_valid(pri) || !rte_is_valid(sec))
return 0;
if (pri->pref != sec->pref)
return 0;
if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
return 0;
if (mergable = pri->attrs->src->proto->rte_mergable)
return mergable(pri, sec);
return 0;
}
static void static void
rte_trace(struct proto *p, rte *e, int dir, char *msg) rte_trace(struct proto *p, rte *e, int dir, char *msg)
{ {
@ -535,6 +587,129 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol
rte_free(old_free); rte_free(old_free);
} }
static struct mpnh *
mpnh_merge_rta(struct mpnh *nhs, rta *a, int max)
{
struct mpnh nh = { .gw = a->gw, .iface = a->iface };
struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh;
return mpnh_merge(nhs, nh2, 1, 0, max, rte_update_pool);
}
rte *
rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, ea_list **tmpa, int silent)
{
// struct proto *p = ah->proto;
struct mpnh *nhs = NULL;
rte *best0, *best, *rt0, *rt, *tmp;
best0 = net->routes;
*rt_free = NULL;
if (!rte_is_valid(best0))
return NULL;
best = export_filter(ah, best0, rt_free, tmpa, silent);
if (!best || !rte_is_reachable(best))
return best;
for (rt0 = best0->next; rt0; rt0 = rt0->next)
{
if (!rte_mergable(best0, rt0))
continue;
rt = export_filter(ah, rt0, &tmp, NULL, 1);
if (!rt)
continue;
if (rte_is_reachable(rt))
nhs = mpnh_merge_rta(nhs, rt->attrs, ah->proto->merge_limit);
if (tmp)
rte_free(tmp);
}
if (nhs)
{
nhs = mpnh_merge_rta(nhs, best->attrs, ah->proto->merge_limit);
if (nhs->next)
{
best = rte_cow_rta(best, rte_update_pool);
best->attrs->dest = RTD_MULTIPATH;
best->attrs->nexthops = nhs;
}
}
if (best != best0)
*rt_free = best;
return best;
}
static void
rt_notify_merged(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed,
rte *new_best, rte*old_best, int refeed)
{
// struct proto *p = ah->proto;
rte *new_best_free = NULL;
rte *old_best_free = NULL;
rte *new_changed_free = NULL;
rte *old_changed_free = NULL;
ea_list *tmpa = NULL;
/* We assume that all rte arguments are either NULL or rte_is_valid() */
/* This check should be done by the caller */
if (!new_best && !old_best)
return;
/* Check whether the change is relevant to the merged route */
if ((new_best == old_best) && !refeed)
{
new_changed = rte_mergable(new_best, new_changed) ?
export_filter(ah, new_changed, &new_changed_free, NULL, 1) : NULL;
old_changed = rte_mergable(old_best, old_changed) ?
export_filter(ah, old_changed, &old_changed_free, NULL, 1) : NULL;
if (!new_changed && !old_changed)
return;
}
if (new_best)
ah->stats->exp_updates_received++;
else
ah->stats->exp_withdraws_received++;
/* Prepare new merged route */
if (new_best)
new_best = rt_export_merged(ah, net, &new_best_free, &tmpa, 0);
/* Prepare old merged route (without proper merged next hops) */
/* There are some issues with running filter on old route - see rt_notify_basic() */
if (old_best && !refeed)
old_best = export_filter(ah, old_best, &old_best_free, NULL, 1);
if (new_best || old_best)
do_rt_notify(ah, net, new_best, old_best, tmpa, refeed);
/* Discard temporary rte's */
if (new_best_free)
rte_free(new_best_free);
if (old_best_free)
rte_free(old_best_free);
if (new_changed_free)
rte_free(new_changed_free);
if (old_changed_free)
rte_free(old_changed_free);
}
/** /**
* rte_announce - announce a routing table change * rte_announce - announce a routing table change
* @tab: table the route has been added to * @tab: table the route has been added to
@ -564,13 +739,20 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol
* the protocol gets called. * the protocol gets called.
*/ */
static void static void
rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, rte *before_old) rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old,
rte *new_best, rte *old_best, rte *before_old)
{ {
if (!rte_is_valid(new))
new = NULL;
if (!rte_is_valid(old)) if (!rte_is_valid(old))
old = before_old = NULL; old = before_old = NULL;
if (!rte_is_valid(new)) if (!rte_is_valid(new_best))
new = NULL; new_best = NULL;
if (!rte_is_valid(old_best))
old_best = NULL;
if (!old && !new) if (!old && !new)
return; return;
@ -593,6 +775,8 @@ rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, rte *befo
if (a->proto->accept_ra_types == type) if (a->proto->accept_ra_types == type)
if (type == RA_ACCEPTED) if (type == RA_ACCEPTED)
rt_notify_accepted(a, net, new, old, before_old, 0); rt_notify_accepted(a, net, new, old, before_old, 0);
else if (type == RA_MERGED)
rt_notify_merged(a, net, new, old, new_best, old_best, 0);
else else
rt_notify_basic(a, net, new, old, 0); rt_notify_basic(a, net, new, old, 0);
} }
@ -898,11 +1082,12 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *sr
} }
/* Propagate the route change */ /* Propagate the route change */
rte_announce(table, RA_ANY, net, new, old, NULL); rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL);
if (net->routes != old_best) if (net->routes != old_best)
rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL); rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL);
if (table->config->sorted) if (table->config->sorted)
rte_announce(table, RA_ACCEPTED, net, new, old, before_old); rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old);
rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL);
if (!net->routes && if (!net->routes &&
(table->gc_counter++ >= table->config->gc_max_ops) && (table->gc_counter++ >= table->config->gc_max_ops) &&
@ -1081,10 +1266,11 @@ rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src)
/* Independent call to rte_announce(), used from next hop /* Independent call to rte_announce(), used from next hop
recalculation, outside of rte_update(). new must be non-NULL */ recalculation, outside of rte_update(). new must be non-NULL */
static inline void static inline void
rte_announce_i(rtable *tab, unsigned type, net *n, rte *new, rte *old) rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old,
rte *new_best, rte *old_best)
{ {
rte_update_lock(); rte_update_lock();
rte_announce(tab, type, n, new, old, NULL); rte_announce(tab, type, net, new, old, new_best, old_best, NULL);
rte_update_unlock(); rte_update_unlock();
} }
@ -1548,7 +1734,7 @@ rt_next_hop_update_net(rtable *tab, net *n)
new = rt_next_hop_update_rte(tab, e); new = rt_next_hop_update_rte(tab, e);
*k = new; *k = new;
rte_announce_i(tab, RA_ANY, n, new, e); rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
rte_trace_in(D_ROUTES, new->sender->proto, new, "updated"); rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
/* Call a pre-comparison hook */ /* Call a pre-comparison hook */
@ -1588,10 +1774,13 @@ rt_next_hop_update_net(rtable *tab, net *n)
/* Announce the new best route */ /* Announce the new best route */
if (new != old_best) if (new != old_best)
{ {
rte_announce_i(tab, RA_OPTIMAL, n, new, old_best); rte_announce_i(tab, RA_OPTIMAL, n, new, old_best, NULL, NULL);
rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]"); rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
} }
/* FIXME: Better announcement of merged routes */
rte_announce_i(tab, RA_MERGED, n, new, old_best, new, old_best);
if (free_old_best) if (free_old_best)
rte_free_quick(old_best); rte_free_quick(old_best);
@ -1755,6 +1944,8 @@ do_feed_baby(struct proto *p, int type, struct announce_hook *h, net *n, rte *e)
rte_update_lock(); rte_update_lock();
if (type == RA_ACCEPTED) if (type == RA_ACCEPTED)
rt_notify_accepted(h, n, e, NULL, NULL, p->refeeding ? 2 : 1); rt_notify_accepted(h, n, e, NULL, NULL, p->refeeding ? 2 : 1);
else if (type == RA_MERGED)
rt_notify_merged(h, n, NULL, NULL, e, p->refeeding ? e : NULL, p->refeeding);
else else
rt_notify_basic(h, n, e, p->refeeding ? e : NULL, p->refeeding); rt_notify_basic(h, n, e, p->refeeding ? e : NULL, p->refeeding);
rte_update_unlock(); rte_update_unlock();
@ -1802,7 +1993,8 @@ again:
/* XXXX perhaps we should change feed for RA_ACCEPTED to not use 'new' */ /* XXXX perhaps we should change feed for RA_ACCEPTED to not use 'new' */
if ((p->accept_ra_types == RA_OPTIMAL) || if ((p->accept_ra_types == RA_OPTIMAL) ||
(p->accept_ra_types == RA_ACCEPTED)) (p->accept_ra_types == RA_ACCEPTED) ||
(p->accept_ra_types == RA_MERGED))
if (rte_is_valid(e)) if (rte_is_valid(e))
{ {
if (p->export_state != ES_FEEDING) if (p->export_state != ES_FEEDING)
@ -2267,12 +2459,22 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d)
rte_update_lock(); /* We use the update buffer for filtering */ rte_update_lock(); /* We use the update buffer for filtering */
tmpa = make_tmp_attrs(e, rte_update_pool); tmpa = make_tmp_attrs(e, rte_update_pool);
if (d->export_mode) /* Special case for merged export */
if ((d->export_mode == RSEM_EXPORT) && (d->export_protocol->accept_ra_types == RA_MERGED))
{
rte *rt_free;
e = rt_export_merged(a, n, &rt_free, &tmpa, 1);
pass = 1;
if (!e)
{ e = ee; goto skip; }
}
else if (d->export_mode)
{ {
struct proto *ep = d->export_protocol; struct proto *ep = d->export_protocol;
int ic = ep->import_control ? ep->import_control(ep, &e, &tmpa, rte_update_pool) : 0; int ic = ep->import_control ? ep->import_control(ep, &e, &tmpa, rte_update_pool) : 0;
if (ep->accept_ra_types == RA_OPTIMAL) if (ep->accept_ra_types == RA_OPTIMAL || ep->accept_ra_types == RA_MERGED)
pass = 1; pass = 1;
if (ic < 0) if (ic < 0)

View file

@ -1312,6 +1312,82 @@ bgp_rte_better(rte *new, rte *old)
} }
int
bgp_rte_mergable(rte *pri, rte *sec)
{
struct bgp_proto *pri_bgp = (struct bgp_proto *) pri->attrs->src->proto;
struct bgp_proto *sec_bgp = (struct bgp_proto *) sec->attrs->src->proto;
eattr *x, *y;
u32 p, s;
/* Skip suppressed routes (see bgp_rte_recalculate()) */
if (pri->u.bgp.suppressed != sec->u.bgp.suppressed)
return 0;
/* RFC 4271 9.1.2.1. Route resolvability test */
if (!rte_resolvable(sec))
return 0;
/* Start with local preferences */
x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
p = x ? x->u.data : pri_bgp->cf->default_local_pref;
s = y ? y->u.data : sec_bgp->cf->default_local_pref;
if (p != s)
return 0;
/* RFC 4271 9.1.2.2. a) Use AS path lengths */
if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths)
{
x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
if (p != s)
return 0;
// if (DELTA(p, s) > pri_bgp->cf->relax_multipath)
// return 0;
}
/* RFC 4271 9.1.2.2. b) Use origins */
x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
p = x ? x->u.data : ORIGIN_INCOMPLETE;
s = y ? y->u.data : ORIGIN_INCOMPLETE;
if (p != s)
return 0;
/* RFC 4271 9.1.2.2. c) Compare MED's */
if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric ||
(bgp_get_neighbor(pri) == bgp_get_neighbor(sec)))
{
x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
p = x ? x->u.data : pri_bgp->cf->default_med;
s = y ? y->u.data : sec_bgp->cf->default_med;
if (p != s)
return 0;
}
/* RFC 4271 9.1.2.2. d) Prefer external peers */
if (pri_bgp->is_internal != sec_bgp->is_internal)
return 0;
/* RFC 4271 9.1.2.2. e) Compare IGP metrics */
p = pri_bgp->cf->igp_metric ? pri->attrs->igp_metric : 0;
s = sec_bgp->cf->igp_metric ? sec->attrs->igp_metric : 0;
if (p != s)
return 0;
/* Remaining criteria are ignored */
return 1;
}
static inline int static inline int
same_group(rte *r, u32 lpref, u32 lasn) same_group(rte *r, u32 lpref, u32 lasn)
{ {

View file

@ -1243,6 +1243,7 @@ bgp_init(struct proto_config *C)
P->feed_begin = bgp_feed_begin; P->feed_begin = bgp_feed_begin;
P->feed_end = bgp_feed_end; P->feed_end = bgp_feed_end;
P->rte_better = bgp_rte_better; P->rte_better = bgp_rte_better;
P->rte_mergable = bgp_rte_mergable;
P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL; P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL;
p->cf = c; p->cf = c;

View file

@ -238,6 +238,7 @@ byte *bgp_attach_attr_wa(struct ea_list **to, struct linpool *pool, unsigned att
struct rta *bgp_decode_attrs(struct bgp_conn *conn, byte *a, uint len, struct linpool *pool, int mandatory); struct rta *bgp_decode_attrs(struct bgp_conn *conn, byte *a, uint len, struct linpool *pool, int mandatory);
int bgp_get_attr(struct eattr *e, byte *buf, int buflen); int bgp_get_attr(struct eattr *e, byte *buf, int buflen);
int bgp_rte_better(struct rte *, struct rte *); int bgp_rte_better(struct rte *, struct rte *);
int bgp_rte_mergable(rte *pri, rte *sec);
int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best); int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best);
void bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs); void bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs);
int bgp_import_control(struct proto *, struct rte **, struct ea_list **, struct linpool *); int bgp_import_control(struct proto *, struct rte **, struct ea_list **, struct linpool *);

View file

@ -352,6 +352,12 @@ static_if_notify(struct proto *p, unsigned flags, struct iface *i)
} }
} }
int
static_rte_mergable(rte *pri, rte *sec)
{
return 1;
}
void void
static_init_config(struct static_config *c) static_init_config(struct static_config *c)
{ {
@ -366,6 +372,7 @@ static_init(struct proto_config *c)
p->neigh_notify = static_neigh_notify; p->neigh_notify = static_neigh_notify;
p->if_notify = static_if_notify; p->if_notify = static_if_notify;
p->rte_mergable = static_rte_mergable;
return p; return p;
} }

View file

@ -17,7 +17,7 @@ CF_DEFINES
CF_DECLS CF_DECLS
CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, KRT_SOURCE, KRT_METRIC) CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, KRT_SOURCE, KRT_METRIC, MERGE, PATHS)
CF_GRAMMAR CF_GRAMMAR
@ -47,6 +47,8 @@ kern_item:
} }
| DEVICE ROUTES bool { THIS_KRT->devroutes = $3; } | DEVICE ROUTES bool { THIS_KRT->devroutes = $3; }
| GRACEFUL RESTART bool { THIS_KRT->graceful_restart = $3; } | GRACEFUL RESTART bool { THIS_KRT->graceful_restart = $3; }
| MERGE PATHS bool { THIS_KRT->merge_paths = $3 ? KRT_DEFAULT_ECMP_LIMIT : 0; }
| MERGE PATHS bool LIMIT expr { THIS_KRT->merge_paths = $3 ? $5 : 0; if (($5 <= 0) || ($5 > 255)) cf_error("Merge paths limit must be in range 1-255"); }
; ;
/* Kernel interface protocol */ /* Kernel interface protocol */

View file

@ -595,9 +595,13 @@ krt_flush_routes(struct krt_proto *p)
static struct rte * static struct rte *
krt_export_net(struct krt_proto *p, net *net, rte **rt_free, ea_list **tmpa) krt_export_net(struct krt_proto *p, net *net, rte **rt_free, ea_list **tmpa)
{ {
struct filter *filter = p->p.main_ahook->out_filter; struct announce_hook *ah = p->p.main_ahook;
struct filter *filter = ah->out_filter;
rte *rt; rte *rt;
if (p->p.accept_ra_types == RA_MERGED)
return rt_export_merged(ah, net, rt_free, tmpa, 1);
rt = net->routes; rt = net->routes;
*rt_free = NULL; *rt_free = NULL;
@ -1091,11 +1095,13 @@ krt_rte_same(rte *a, rte *b)
struct krt_config *krt_cf; struct krt_config *krt_cf;
static struct proto * static struct proto *
krt_init(struct proto_config *c) krt_init(struct proto_config *C)
{ {
struct krt_proto *p = proto_new(c, sizeof(struct krt_proto)); struct krt_proto *p = proto_new(C, sizeof(struct krt_proto));
struct krt_config *c = (struct krt_config *) C;
p->p.accept_ra_types = RA_OPTIMAL; p->p.accept_ra_types = c->merge_paths ? RA_MERGED : RA_OPTIMAL;
p->p.merge_limit = c->merge_paths;
p->p.import_control = krt_import_control; p->p.import_control = krt_import_control;
p->p.rt_notify = krt_rt_notify; p->p.rt_notify = krt_rt_notify;
p->p.if_notify = krt_if_notify; p->p.if_notify = krt_if_notify;
@ -1161,7 +1167,8 @@ krt_reconfigure(struct proto *p, struct proto_config *new)
return 0; return 0;
/* persist, graceful restart need not be the same */ /* persist, graceful restart need not be the same */
return o->scan_time == n->scan_time && o->learn == n->learn && o->devroutes == n->devroutes; return o->scan_time == n->scan_time && o->learn == n->learn &&
o->devroutes == n->devroutes && o->merge_paths == n->merge_paths;
} }
static void static void

View file

@ -26,6 +26,8 @@ struct kif_proto;
#define KRF_DELETE 3 /* Should be deleted */ #define KRF_DELETE 3 /* Should be deleted */
#define KRF_IGNORE 4 /* To be ignored */ #define KRF_IGNORE 4 /* To be ignored */
#define KRT_DEFAULT_ECMP_LIMIT 16
#define EA_KRT_SOURCE EA_CODE(EAP_KRT, 0) #define EA_KRT_SOURCE EA_CODE(EAP_KRT, 0)
#define EA_KRT_METRIC EA_CODE(EAP_KRT, 1) #define EA_KRT_METRIC EA_CODE(EAP_KRT, 1)
@ -47,6 +49,7 @@ struct krt_config {
int learn; /* Learn routes from other sources */ int learn; /* Learn routes from other sources */
int devroutes; /* Allow export of device routes */ int devroutes; /* Allow export of device routes */
int graceful_restart; /* Regard graceful restart recovery */ int graceful_restart; /* Regard graceful restart recovery */
int merge_paths; /* Exported routes are merged for ECMP */
}; };
struct krt_proto { struct krt_proto {