BGP multipath support
Kernel option 'merge paths' allows to merge routes exported to kernel protocol (currently BGP and static routes) to multipath routes.
This commit is contained in:
parent
db027a41d4
commit
8d9eef1771
14 changed files with 368 additions and 36 deletions
|
@ -2227,6 +2227,18 @@ limitations can be overcome using another routing table and the pipe protocol.
|
||||||
a graceful restart recovery is active, the Kernel protocol will defer
|
a graceful restart recovery is active, the Kernel protocol will defer
|
||||||
synchronization of routing tables until the end of the recovery. Note
|
synchronization of routing tables until the end of the recovery. Note
|
||||||
that import of kernel routes to BIRD is not affected.
|
that import of kernel routes to BIRD is not affected.
|
||||||
|
|
||||||
|
<tag>merge paths <M>switch</M> [limit <M>number</M>]</tag>
|
||||||
|
Usually, only best routes are exported to the kernel protocol. With path
|
||||||
|
merging enabled, both best routes and equivalent non-best routes are
|
||||||
|
merged during export to generate one ECMP (equal-cost multipath) route
|
||||||
|
for each network. This is useful e.g. for BGP multipath. Note that best
|
||||||
|
routes are still pivotal for route export (responsible for most
|
||||||
|
properties of resulting ECMP routes), while exported non-best routes are
|
||||||
|
responsible just for additional multipath next hops. This option also
|
||||||
|
allows to specify a limit on maximal number of nexthops in one route. By
|
||||||
|
default, multipath merging is disabled. If enabled, default value of the
|
||||||
|
limit is 16.
|
||||||
</descrip>
|
</descrip>
|
||||||
|
|
||||||
<sect1>Attributes
|
<sect1>Attributes
|
||||||
|
|
|
@ -471,26 +471,22 @@ static inline void f_rte_cow(void)
|
||||||
static void
|
static void
|
||||||
f_rta_cow(void)
|
f_rta_cow(void)
|
||||||
{
|
{
|
||||||
if ((*f_rte)->attrs->aflags & RTAF_CACHED) {
|
if (!rta_is_cached((*f_rte)->attrs))
|
||||||
|
return;
|
||||||
|
|
||||||
/* Prepare to modify rte */
|
/* Prepare to modify rte */
|
||||||
f_rte_cow();
|
f_rte_cow();
|
||||||
|
|
||||||
/* Store old rta to free it later */
|
/* Store old rta to free it later, it stores reference from rte_cow() */
|
||||||
f_old_rta = (*f_rte)->attrs;
|
f_old_rta = (*f_rte)->attrs;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Alloc new rta, do shallow copy and update rte. Fields eattrs
|
* Get shallow copy of rta. Fields eattrs and nexthops of rta are shared
|
||||||
* and nexthops of rta are shared with f_old_rta (they will be
|
* with f_old_rta (they will be copied when the cached rta will be obtained
|
||||||
* copied when the cached rta will be obtained at the end of
|
* at the end of f_run()), also the lock of hostentry is inherited (we
|
||||||
* f_run()), also the lock of hostentry is inherited (we suppose
|
* suppose hostentry is not changed by filters).
|
||||||
* hostentry is not changed by filters).
|
*/
|
||||||
*/
|
(*f_rte)->attrs = rta_do_cow((*f_rte)->attrs, f_pool);
|
||||||
rta *ra = lp_alloc(f_pool, sizeof(rta));
|
|
||||||
memcpy(ra, f_old_rta, sizeof(rta));
|
|
||||||
ra->aflags = 0;
|
|
||||||
(*f_rte)->attrs = ra;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct tbf rl_runtime_err = TBF_DEFAULT_LOG_LIMITS;
|
static struct tbf rl_runtime_err = TBF_DEFAULT_LOG_LIMITS;
|
||||||
|
|
|
@ -31,6 +31,7 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define ABS(a) ((a)>=0 ? (a) : -(a))
|
#define ABS(a) ((a)>=0 ? (a) : -(a))
|
||||||
|
#define DELTA(a,b) (((a)>=(b))?(a)-(b):(b)-(a))
|
||||||
#define ARRAY_SIZE(a) (sizeof(a)/sizeof(*(a)))
|
#define ARRAY_SIZE(a) (sizeof(a)/sizeof(*(a)))
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -158,6 +158,7 @@ struct proto {
|
||||||
byte gr_wait; /* Route export to protocol is postponed until graceful restart */
|
byte gr_wait; /* Route export to protocol is postponed until graceful restart */
|
||||||
byte down_sched; /* Shutdown is scheduled for later (PDS_*) */
|
byte down_sched; /* Shutdown is scheduled for later (PDS_*) */
|
||||||
byte down_code; /* Reason for shutdown (PDC_* codes) */
|
byte down_code; /* Reason for shutdown (PDC_* codes) */
|
||||||
|
byte merge_limit; /* Maximal number of nexthops for RA_MERGED */
|
||||||
u32 hash_key; /* Random key used for hashing of neighbors */
|
u32 hash_key; /* Random key used for hashing of neighbors */
|
||||||
bird_clock_t last_state_change; /* Time of last state transition */
|
bird_clock_t last_state_change; /* Time of last state transition */
|
||||||
char *last_state_name_announced; /* Last state name we've announced to the user */
|
char *last_state_name_announced; /* Last state name we've announced to the user */
|
||||||
|
@ -200,6 +201,7 @@ struct proto {
|
||||||
* rte_recalculate Called at the beginning of the best route selection
|
* rte_recalculate Called at the beginning of the best route selection
|
||||||
* rte_better Compare two rte's and decide which one is better (1=first, 0=second).
|
* rte_better Compare two rte's and decide which one is better (1=first, 0=second).
|
||||||
* rte_same Compare two rte's and decide whether they are identical (1=yes, 0=no).
|
* rte_same Compare two rte's and decide whether they are identical (1=yes, 0=no).
|
||||||
|
* rte_mergable Compare two rte's and decide whether they could be merged (1=yes, 0=no).
|
||||||
* rte_insert Called whenever a rte is inserted to a routing table.
|
* rte_insert Called whenever a rte is inserted to a routing table.
|
||||||
* rte_remove Called whenever a rte is removed from the routing table.
|
* rte_remove Called whenever a rte is removed from the routing table.
|
||||||
*/
|
*/
|
||||||
|
@ -207,6 +209,7 @@ struct proto {
|
||||||
int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *);
|
int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *);
|
||||||
int (*rte_better)(struct rte *, struct rte *);
|
int (*rte_better)(struct rte *, struct rte *);
|
||||||
int (*rte_same)(struct rte *, struct rte *);
|
int (*rte_same)(struct rte *, struct rte *);
|
||||||
|
int (*rte_mergable)(struct rte *, struct rte *);
|
||||||
void (*rte_insert)(struct network *, struct rte *);
|
void (*rte_insert)(struct network *, struct rte *);
|
||||||
void (*rte_remove)(struct network *, struct rte *);
|
void (*rte_remove)(struct network *, struct rte *);
|
||||||
|
|
||||||
|
|
11
nest/route.h
11
nest/route.h
|
@ -240,6 +240,7 @@ static inline int rte_is_filtered(rte *r) { return !!(r->flags & REF_FILTERED);
|
||||||
#define RA_OPTIMAL 1 /* Announcement of optimal route change */
|
#define RA_OPTIMAL 1 /* Announcement of optimal route change */
|
||||||
#define RA_ACCEPTED 2 /* Announcement of first accepted route */
|
#define RA_ACCEPTED 2 /* Announcement of first accepted route */
|
||||||
#define RA_ANY 3 /* Announcement of any route change */
|
#define RA_ANY 3 /* Announcement of any route change */
|
||||||
|
#define RA_MERGED 4 /* Announcement of optimal route merged with next ones */
|
||||||
|
|
||||||
/* Return value of import_control() callback */
|
/* Return value of import_control() callback */
|
||||||
#define RIC_ACCEPT 1 /* Accepted by protocol */
|
#define RIC_ACCEPT 1 /* Accepted by protocol */
|
||||||
|
@ -263,12 +264,14 @@ void rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *s
|
||||||
static inline void rte_update(struct proto *p, net *net, rte *new) { rte_update2(p->main_ahook, net, new, p->main_source); }
|
static inline void rte_update(struct proto *p, net *net, rte *new) { rte_update2(p->main_ahook, net, new, p->main_source); }
|
||||||
void rte_discard(rtable *tab, rte *old);
|
void rte_discard(rtable *tab, rte *old);
|
||||||
int rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter);
|
int rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter);
|
||||||
|
rte *rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, struct ea_list **tmpa, int silent);
|
||||||
void rt_refresh_begin(rtable *t, struct announce_hook *ah);
|
void rt_refresh_begin(rtable *t, struct announce_hook *ah);
|
||||||
void rt_refresh_end(rtable *t, struct announce_hook *ah);
|
void rt_refresh_end(rtable *t, struct announce_hook *ah);
|
||||||
void rte_dump(rte *);
|
void rte_dump(rte *);
|
||||||
void rte_free(rte *);
|
void rte_free(rte *);
|
||||||
rte *rte_do_cow(rte *);
|
rte *rte_do_cow(rte *);
|
||||||
static inline rte * rte_cow(rte *r) { return (r->flags & REF_COW) ? rte_do_cow(r) : r; }
|
static inline rte * rte_cow(rte *r) { return (r->flags & REF_COW) ? rte_do_cow(r) : r; }
|
||||||
|
rte *rte_cow_rta(rte *r, linpool *lp);
|
||||||
void rt_dump(rtable *);
|
void rt_dump(rtable *);
|
||||||
void rt_dump_all(void);
|
void rt_dump_all(void);
|
||||||
int rt_feed_baby(struct proto *p);
|
int rt_feed_baby(struct proto *p);
|
||||||
|
@ -388,6 +391,12 @@ typedef struct rta {
|
||||||
#define IGP_METRIC_UNKNOWN 0x80000000 /* Default igp_metric used when no other
|
#define IGP_METRIC_UNKNOWN 0x80000000 /* Default igp_metric used when no other
|
||||||
protocol-specific metric is availabe */
|
protocol-specific metric is availabe */
|
||||||
|
|
||||||
|
|
||||||
|
/* Route has regular, reachable nexthop (i.e. not RTD_UNREACHABLE and like) */
|
||||||
|
static inline int rte_is_reachable(rte *r)
|
||||||
|
{ uint d = r->attrs->dest; return (d == RTD_ROUTER) || (d == RTD_DEVICE) || (d == RTD_MULTIPATH); }
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Extended Route Attributes
|
* Extended Route Attributes
|
||||||
*/
|
*/
|
||||||
|
@ -490,6 +499,8 @@ static inline int rta_is_cached(rta *r) { return r->aflags & RTAF_CACHED; }
|
||||||
static inline rta *rta_clone(rta *r) { r->uc++; return r; }
|
static inline rta *rta_clone(rta *r) { r->uc++; return r; }
|
||||||
void rta__free(rta *r);
|
void rta__free(rta *r);
|
||||||
static inline void rta_free(rta *r) { if (r && !--r->uc) rta__free(r); }
|
static inline void rta_free(rta *r) { if (r && !--r->uc) rta__free(r); }
|
||||||
|
rta *rta_do_cow(rta *o, linpool *lp);
|
||||||
|
static inline rta * rta_cow(rta *r, linpool *lp) { return rta_is_cached(r) ? rta_do_cow(r, lp) : r; }
|
||||||
void rta_dump(rta *);
|
void rta_dump(rta *);
|
||||||
void rta_dump_all(void);
|
void rta_dump_all(void);
|
||||||
void rta_show(struct cli *, rta *, ea_list *);
|
void rta_show(struct cli *, rta *, ea_list *);
|
||||||
|
|
|
@ -1138,6 +1138,16 @@ rta__free(rta *a)
|
||||||
sl_free(rta_slab, a);
|
sl_free(rta_slab, a);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rta *
|
||||||
|
rta_do_cow(rta *o, linpool *lp)
|
||||||
|
{
|
||||||
|
rta *r = lp_alloc(lp, sizeof(rta));
|
||||||
|
memcpy(r, o, sizeof(rta));
|
||||||
|
r->aflags = 0;
|
||||||
|
r->uc = 0;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* rta_dump - dump route attributes
|
* rta_dump - dump route attributes
|
||||||
* @a: attribute structure to dump
|
* @a: attribute structure to dump
|
||||||
|
|
228
nest/rt-table.c
228
nest/rt-table.c
|
@ -144,6 +144,38 @@ rte_do_cow(rte *r)
|
||||||
return e;
|
return e;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* rte_cow_rta - get a private writable copy of &rte with writable &rta
|
||||||
|
* @r: a route entry to be copied
|
||||||
|
* @lp: a linpool from which to allocate &rta
|
||||||
|
*
|
||||||
|
* rte_cow_rta() takes a &rte and prepares it and associated &rta for
|
||||||
|
* modification. There are three possibilities: First, both &rte and &rta are
|
||||||
|
* private copies, in that case they are returned unchanged. Second, &rte is
|
||||||
|
* private copy, but &rta is cached, in that case &rta is duplicated using
|
||||||
|
* rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
|
||||||
|
* both structures are duplicated by rte_do_cow() and rta_do_cow().
|
||||||
|
*
|
||||||
|
* Note that in the second case, cached &rta loses one reference, while private
|
||||||
|
* copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
|
||||||
|
* nexthops, ...) with it. To work properly, original shared &rta should have
|
||||||
|
* another reference during the life of created private copy.
|
||||||
|
*
|
||||||
|
* Result: a pointer to the new writable &rte with writable &rta.
|
||||||
|
*/
|
||||||
|
rte *
|
||||||
|
rte_cow_rta(rte *r, linpool *lp)
|
||||||
|
{
|
||||||
|
if (!rta_is_cached(r->attrs))
|
||||||
|
return r;
|
||||||
|
|
||||||
|
rte *e = rte_cow(r);
|
||||||
|
rta *a = rta_do_cow(r->attrs, lp);
|
||||||
|
rta_free(e->attrs);
|
||||||
|
e->attrs = a;
|
||||||
|
return e;
|
||||||
|
}
|
||||||
|
|
||||||
static int /* Actually better or at least as good as */
|
static int /* Actually better or at least as good as */
|
||||||
rte_better(rte *new, rte *old)
|
rte_better(rte *new, rte *old)
|
||||||
{
|
{
|
||||||
|
@ -172,6 +204,26 @@ rte_better(rte *new, rte *old)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
rte_mergable(rte *pri, rte *sec)
|
||||||
|
{
|
||||||
|
int (*mergable)(rte *, rte *);
|
||||||
|
|
||||||
|
if (!rte_is_valid(pri) || !rte_is_valid(sec))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (pri->pref != sec->pref)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (mergable = pri->attrs->src->proto->rte_mergable)
|
||||||
|
return mergable(pri, sec);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
rte_trace(struct proto *p, rte *e, int dir, char *msg)
|
rte_trace(struct proto *p, rte *e, int dir, char *msg)
|
||||||
{
|
{
|
||||||
|
@ -535,6 +587,129 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol
|
||||||
rte_free(old_free);
|
rte_free(old_free);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static struct mpnh *
|
||||||
|
mpnh_merge_rta(struct mpnh *nhs, rta *a, int max)
|
||||||
|
{
|
||||||
|
struct mpnh nh = { .gw = a->gw, .iface = a->iface };
|
||||||
|
struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh;
|
||||||
|
return mpnh_merge(nhs, nh2, 1, 0, max, rte_update_pool);
|
||||||
|
}
|
||||||
|
|
||||||
|
rte *
|
||||||
|
rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, ea_list **tmpa, int silent)
|
||||||
|
{
|
||||||
|
// struct proto *p = ah->proto;
|
||||||
|
struct mpnh *nhs = NULL;
|
||||||
|
rte *best0, *best, *rt0, *rt, *tmp;
|
||||||
|
|
||||||
|
best0 = net->routes;
|
||||||
|
*rt_free = NULL;
|
||||||
|
|
||||||
|
if (!rte_is_valid(best0))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
best = export_filter(ah, best0, rt_free, tmpa, silent);
|
||||||
|
|
||||||
|
if (!best || !rte_is_reachable(best))
|
||||||
|
return best;
|
||||||
|
|
||||||
|
for (rt0 = best0->next; rt0; rt0 = rt0->next)
|
||||||
|
{
|
||||||
|
if (!rte_mergable(best0, rt0))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
rt = export_filter(ah, rt0, &tmp, NULL, 1);
|
||||||
|
|
||||||
|
if (!rt)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (rte_is_reachable(rt))
|
||||||
|
nhs = mpnh_merge_rta(nhs, rt->attrs, ah->proto->merge_limit);
|
||||||
|
|
||||||
|
if (tmp)
|
||||||
|
rte_free(tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nhs)
|
||||||
|
{
|
||||||
|
nhs = mpnh_merge_rta(nhs, best->attrs, ah->proto->merge_limit);
|
||||||
|
|
||||||
|
if (nhs->next)
|
||||||
|
{
|
||||||
|
best = rte_cow_rta(best, rte_update_pool);
|
||||||
|
best->attrs->dest = RTD_MULTIPATH;
|
||||||
|
best->attrs->nexthops = nhs;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (best != best0)
|
||||||
|
*rt_free = best;
|
||||||
|
|
||||||
|
return best;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
rt_notify_merged(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed,
|
||||||
|
rte *new_best, rte*old_best, int refeed)
|
||||||
|
{
|
||||||
|
// struct proto *p = ah->proto;
|
||||||
|
|
||||||
|
rte *new_best_free = NULL;
|
||||||
|
rte *old_best_free = NULL;
|
||||||
|
rte *new_changed_free = NULL;
|
||||||
|
rte *old_changed_free = NULL;
|
||||||
|
ea_list *tmpa = NULL;
|
||||||
|
|
||||||
|
/* We assume that all rte arguments are either NULL or rte_is_valid() */
|
||||||
|
|
||||||
|
/* This check should be done by the caller */
|
||||||
|
if (!new_best && !old_best)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* Check whether the change is relevant to the merged route */
|
||||||
|
if ((new_best == old_best) && !refeed)
|
||||||
|
{
|
||||||
|
new_changed = rte_mergable(new_best, new_changed) ?
|
||||||
|
export_filter(ah, new_changed, &new_changed_free, NULL, 1) : NULL;
|
||||||
|
|
||||||
|
old_changed = rte_mergable(old_best, old_changed) ?
|
||||||
|
export_filter(ah, old_changed, &old_changed_free, NULL, 1) : NULL;
|
||||||
|
|
||||||
|
if (!new_changed && !old_changed)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (new_best)
|
||||||
|
ah->stats->exp_updates_received++;
|
||||||
|
else
|
||||||
|
ah->stats->exp_withdraws_received++;
|
||||||
|
|
||||||
|
/* Prepare new merged route */
|
||||||
|
if (new_best)
|
||||||
|
new_best = rt_export_merged(ah, net, &new_best_free, &tmpa, 0);
|
||||||
|
|
||||||
|
/* Prepare old merged route (without proper merged next hops) */
|
||||||
|
/* There are some issues with running filter on old route - see rt_notify_basic() */
|
||||||
|
if (old_best && !refeed)
|
||||||
|
old_best = export_filter(ah, old_best, &old_best_free, NULL, 1);
|
||||||
|
|
||||||
|
if (new_best || old_best)
|
||||||
|
do_rt_notify(ah, net, new_best, old_best, tmpa, refeed);
|
||||||
|
|
||||||
|
/* Discard temporary rte's */
|
||||||
|
if (new_best_free)
|
||||||
|
rte_free(new_best_free);
|
||||||
|
if (old_best_free)
|
||||||
|
rte_free(old_best_free);
|
||||||
|
if (new_changed_free)
|
||||||
|
rte_free(new_changed_free);
|
||||||
|
if (old_changed_free)
|
||||||
|
rte_free(old_changed_free);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* rte_announce - announce a routing table change
|
* rte_announce - announce a routing table change
|
||||||
* @tab: table the route has been added to
|
* @tab: table the route has been added to
|
||||||
|
@ -564,13 +739,20 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol
|
||||||
* the protocol gets called.
|
* the protocol gets called.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, rte *before_old)
|
rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old,
|
||||||
|
rte *new_best, rte *old_best, rte *before_old)
|
||||||
{
|
{
|
||||||
|
if (!rte_is_valid(new))
|
||||||
|
new = NULL;
|
||||||
|
|
||||||
if (!rte_is_valid(old))
|
if (!rte_is_valid(old))
|
||||||
old = before_old = NULL;
|
old = before_old = NULL;
|
||||||
|
|
||||||
if (!rte_is_valid(new))
|
if (!rte_is_valid(new_best))
|
||||||
new = NULL;
|
new_best = NULL;
|
||||||
|
|
||||||
|
if (!rte_is_valid(old_best))
|
||||||
|
old_best = NULL;
|
||||||
|
|
||||||
if (!old && !new)
|
if (!old && !new)
|
||||||
return;
|
return;
|
||||||
|
@ -593,6 +775,8 @@ rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, rte *befo
|
||||||
if (a->proto->accept_ra_types == type)
|
if (a->proto->accept_ra_types == type)
|
||||||
if (type == RA_ACCEPTED)
|
if (type == RA_ACCEPTED)
|
||||||
rt_notify_accepted(a, net, new, old, before_old, 0);
|
rt_notify_accepted(a, net, new, old, before_old, 0);
|
||||||
|
else if (type == RA_MERGED)
|
||||||
|
rt_notify_merged(a, net, new, old, new_best, old_best, 0);
|
||||||
else
|
else
|
||||||
rt_notify_basic(a, net, new, old, 0);
|
rt_notify_basic(a, net, new, old, 0);
|
||||||
}
|
}
|
||||||
|
@ -898,11 +1082,12 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *sr
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Propagate the route change */
|
/* Propagate the route change */
|
||||||
rte_announce(table, RA_ANY, net, new, old, NULL);
|
rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL);
|
||||||
if (net->routes != old_best)
|
if (net->routes != old_best)
|
||||||
rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL);
|
rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL);
|
||||||
if (table->config->sorted)
|
if (table->config->sorted)
|
||||||
rte_announce(table, RA_ACCEPTED, net, new, old, before_old);
|
rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old);
|
||||||
|
rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL);
|
||||||
|
|
||||||
if (!net->routes &&
|
if (!net->routes &&
|
||||||
(table->gc_counter++ >= table->config->gc_max_ops) &&
|
(table->gc_counter++ >= table->config->gc_max_ops) &&
|
||||||
|
@ -1081,10 +1266,11 @@ rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src)
|
||||||
/* Independent call to rte_announce(), used from next hop
|
/* Independent call to rte_announce(), used from next hop
|
||||||
recalculation, outside of rte_update(). new must be non-NULL */
|
recalculation, outside of rte_update(). new must be non-NULL */
|
||||||
static inline void
|
static inline void
|
||||||
rte_announce_i(rtable *tab, unsigned type, net *n, rte *new, rte *old)
|
rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old,
|
||||||
|
rte *new_best, rte *old_best)
|
||||||
{
|
{
|
||||||
rte_update_lock();
|
rte_update_lock();
|
||||||
rte_announce(tab, type, n, new, old, NULL);
|
rte_announce(tab, type, net, new, old, new_best, old_best, NULL);
|
||||||
rte_update_unlock();
|
rte_update_unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1548,7 +1734,7 @@ rt_next_hop_update_net(rtable *tab, net *n)
|
||||||
new = rt_next_hop_update_rte(tab, e);
|
new = rt_next_hop_update_rte(tab, e);
|
||||||
*k = new;
|
*k = new;
|
||||||
|
|
||||||
rte_announce_i(tab, RA_ANY, n, new, e);
|
rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
|
||||||
rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
|
rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
|
||||||
|
|
||||||
/* Call a pre-comparison hook */
|
/* Call a pre-comparison hook */
|
||||||
|
@ -1588,10 +1774,13 @@ rt_next_hop_update_net(rtable *tab, net *n)
|
||||||
/* Announce the new best route */
|
/* Announce the new best route */
|
||||||
if (new != old_best)
|
if (new != old_best)
|
||||||
{
|
{
|
||||||
rte_announce_i(tab, RA_OPTIMAL, n, new, old_best);
|
rte_announce_i(tab, RA_OPTIMAL, n, new, old_best, NULL, NULL);
|
||||||
rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
|
rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* FIXME: Better announcement of merged routes */
|
||||||
|
rte_announce_i(tab, RA_MERGED, n, new, old_best, new, old_best);
|
||||||
|
|
||||||
if (free_old_best)
|
if (free_old_best)
|
||||||
rte_free_quick(old_best);
|
rte_free_quick(old_best);
|
||||||
|
|
||||||
|
@ -1755,6 +1944,8 @@ do_feed_baby(struct proto *p, int type, struct announce_hook *h, net *n, rte *e)
|
||||||
rte_update_lock();
|
rte_update_lock();
|
||||||
if (type == RA_ACCEPTED)
|
if (type == RA_ACCEPTED)
|
||||||
rt_notify_accepted(h, n, e, NULL, NULL, p->refeeding ? 2 : 1);
|
rt_notify_accepted(h, n, e, NULL, NULL, p->refeeding ? 2 : 1);
|
||||||
|
else if (type == RA_MERGED)
|
||||||
|
rt_notify_merged(h, n, NULL, NULL, e, p->refeeding ? e : NULL, p->refeeding);
|
||||||
else
|
else
|
||||||
rt_notify_basic(h, n, e, p->refeeding ? e : NULL, p->refeeding);
|
rt_notify_basic(h, n, e, p->refeeding ? e : NULL, p->refeeding);
|
||||||
rte_update_unlock();
|
rte_update_unlock();
|
||||||
|
@ -1802,7 +1993,8 @@ again:
|
||||||
/* XXXX perhaps we should change feed for RA_ACCEPTED to not use 'new' */
|
/* XXXX perhaps we should change feed for RA_ACCEPTED to not use 'new' */
|
||||||
|
|
||||||
if ((p->accept_ra_types == RA_OPTIMAL) ||
|
if ((p->accept_ra_types == RA_OPTIMAL) ||
|
||||||
(p->accept_ra_types == RA_ACCEPTED))
|
(p->accept_ra_types == RA_ACCEPTED) ||
|
||||||
|
(p->accept_ra_types == RA_MERGED))
|
||||||
if (rte_is_valid(e))
|
if (rte_is_valid(e))
|
||||||
{
|
{
|
||||||
if (p->export_state != ES_FEEDING)
|
if (p->export_state != ES_FEEDING)
|
||||||
|
@ -2267,12 +2459,22 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d)
|
||||||
rte_update_lock(); /* We use the update buffer for filtering */
|
rte_update_lock(); /* We use the update buffer for filtering */
|
||||||
tmpa = make_tmp_attrs(e, rte_update_pool);
|
tmpa = make_tmp_attrs(e, rte_update_pool);
|
||||||
|
|
||||||
if (d->export_mode)
|
/* Special case for merged export */
|
||||||
|
if ((d->export_mode == RSEM_EXPORT) && (d->export_protocol->accept_ra_types == RA_MERGED))
|
||||||
|
{
|
||||||
|
rte *rt_free;
|
||||||
|
e = rt_export_merged(a, n, &rt_free, &tmpa, 1);
|
||||||
|
pass = 1;
|
||||||
|
|
||||||
|
if (!e)
|
||||||
|
{ e = ee; goto skip; }
|
||||||
|
}
|
||||||
|
else if (d->export_mode)
|
||||||
{
|
{
|
||||||
struct proto *ep = d->export_protocol;
|
struct proto *ep = d->export_protocol;
|
||||||
int ic = ep->import_control ? ep->import_control(ep, &e, &tmpa, rte_update_pool) : 0;
|
int ic = ep->import_control ? ep->import_control(ep, &e, &tmpa, rte_update_pool) : 0;
|
||||||
|
|
||||||
if (ep->accept_ra_types == RA_OPTIMAL)
|
if (ep->accept_ra_types == RA_OPTIMAL || ep->accept_ra_types == RA_MERGED)
|
||||||
pass = 1;
|
pass = 1;
|
||||||
|
|
||||||
if (ic < 0)
|
if (ic < 0)
|
||||||
|
|
|
@ -1312,6 +1312,82 @@ bgp_rte_better(rte *new, rte *old)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
bgp_rte_mergable(rte *pri, rte *sec)
|
||||||
|
{
|
||||||
|
struct bgp_proto *pri_bgp = (struct bgp_proto *) pri->attrs->src->proto;
|
||||||
|
struct bgp_proto *sec_bgp = (struct bgp_proto *) sec->attrs->src->proto;
|
||||||
|
eattr *x, *y;
|
||||||
|
u32 p, s;
|
||||||
|
|
||||||
|
/* Skip suppressed routes (see bgp_rte_recalculate()) */
|
||||||
|
if (pri->u.bgp.suppressed != sec->u.bgp.suppressed)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* RFC 4271 9.1.2.1. Route resolvability test */
|
||||||
|
if (!rte_resolvable(sec))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* Start with local preferences */
|
||||||
|
x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
|
||||||
|
y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
|
||||||
|
p = x ? x->u.data : pri_bgp->cf->default_local_pref;
|
||||||
|
s = y ? y->u.data : sec_bgp->cf->default_local_pref;
|
||||||
|
if (p != s)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* RFC 4271 9.1.2.2. a) Use AS path lengths */
|
||||||
|
if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths)
|
||||||
|
{
|
||||||
|
x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
|
||||||
|
y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
|
||||||
|
p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
|
||||||
|
s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
|
||||||
|
|
||||||
|
if (p != s)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
// if (DELTA(p, s) > pri_bgp->cf->relax_multipath)
|
||||||
|
// return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* RFC 4271 9.1.2.2. b) Use origins */
|
||||||
|
x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
|
||||||
|
y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
|
||||||
|
p = x ? x->u.data : ORIGIN_INCOMPLETE;
|
||||||
|
s = y ? y->u.data : ORIGIN_INCOMPLETE;
|
||||||
|
if (p != s)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* RFC 4271 9.1.2.2. c) Compare MED's */
|
||||||
|
if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric ||
|
||||||
|
(bgp_get_neighbor(pri) == bgp_get_neighbor(sec)))
|
||||||
|
{
|
||||||
|
x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
|
||||||
|
y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
|
||||||
|
p = x ? x->u.data : pri_bgp->cf->default_med;
|
||||||
|
s = y ? y->u.data : sec_bgp->cf->default_med;
|
||||||
|
if (p != s)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* RFC 4271 9.1.2.2. d) Prefer external peers */
|
||||||
|
if (pri_bgp->is_internal != sec_bgp->is_internal)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* RFC 4271 9.1.2.2. e) Compare IGP metrics */
|
||||||
|
p = pri_bgp->cf->igp_metric ? pri->attrs->igp_metric : 0;
|
||||||
|
s = sec_bgp->cf->igp_metric ? sec->attrs->igp_metric : 0;
|
||||||
|
if (p != s)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* Remaining criteria are ignored */
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static inline int
|
static inline int
|
||||||
same_group(rte *r, u32 lpref, u32 lasn)
|
same_group(rte *r, u32 lpref, u32 lasn)
|
||||||
{
|
{
|
||||||
|
|
|
@ -1243,6 +1243,7 @@ bgp_init(struct proto_config *C)
|
||||||
P->feed_begin = bgp_feed_begin;
|
P->feed_begin = bgp_feed_begin;
|
||||||
P->feed_end = bgp_feed_end;
|
P->feed_end = bgp_feed_end;
|
||||||
P->rte_better = bgp_rte_better;
|
P->rte_better = bgp_rte_better;
|
||||||
|
P->rte_mergable = bgp_rte_mergable;
|
||||||
P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL;
|
P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL;
|
||||||
|
|
||||||
p->cf = c;
|
p->cf = c;
|
||||||
|
|
|
@ -238,6 +238,7 @@ byte *bgp_attach_attr_wa(struct ea_list **to, struct linpool *pool, unsigned att
|
||||||
struct rta *bgp_decode_attrs(struct bgp_conn *conn, byte *a, uint len, struct linpool *pool, int mandatory);
|
struct rta *bgp_decode_attrs(struct bgp_conn *conn, byte *a, uint len, struct linpool *pool, int mandatory);
|
||||||
int bgp_get_attr(struct eattr *e, byte *buf, int buflen);
|
int bgp_get_attr(struct eattr *e, byte *buf, int buflen);
|
||||||
int bgp_rte_better(struct rte *, struct rte *);
|
int bgp_rte_better(struct rte *, struct rte *);
|
||||||
|
int bgp_rte_mergable(rte *pri, rte *sec);
|
||||||
int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best);
|
int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best);
|
||||||
void bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs);
|
void bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs);
|
||||||
int bgp_import_control(struct proto *, struct rte **, struct ea_list **, struct linpool *);
|
int bgp_import_control(struct proto *, struct rte **, struct ea_list **, struct linpool *);
|
||||||
|
|
|
@ -352,6 +352,12 @@ static_if_notify(struct proto *p, unsigned flags, struct iface *i)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
static_rte_mergable(rte *pri, rte *sec)
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
static_init_config(struct static_config *c)
|
static_init_config(struct static_config *c)
|
||||||
{
|
{
|
||||||
|
@ -366,6 +372,7 @@ static_init(struct proto_config *c)
|
||||||
|
|
||||||
p->neigh_notify = static_neigh_notify;
|
p->neigh_notify = static_neigh_notify;
|
||||||
p->if_notify = static_if_notify;
|
p->if_notify = static_if_notify;
|
||||||
|
p->rte_mergable = static_rte_mergable;
|
||||||
|
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,7 +17,7 @@ CF_DEFINES
|
||||||
|
|
||||||
CF_DECLS
|
CF_DECLS
|
||||||
|
|
||||||
CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, KRT_SOURCE, KRT_METRIC)
|
CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, KRT_SOURCE, KRT_METRIC, MERGE, PATHS)
|
||||||
|
|
||||||
CF_GRAMMAR
|
CF_GRAMMAR
|
||||||
|
|
||||||
|
@ -47,6 +47,8 @@ kern_item:
|
||||||
}
|
}
|
||||||
| DEVICE ROUTES bool { THIS_KRT->devroutes = $3; }
|
| DEVICE ROUTES bool { THIS_KRT->devroutes = $3; }
|
||||||
| GRACEFUL RESTART bool { THIS_KRT->graceful_restart = $3; }
|
| GRACEFUL RESTART bool { THIS_KRT->graceful_restart = $3; }
|
||||||
|
| MERGE PATHS bool { THIS_KRT->merge_paths = $3 ? KRT_DEFAULT_ECMP_LIMIT : 0; }
|
||||||
|
| MERGE PATHS bool LIMIT expr { THIS_KRT->merge_paths = $3 ? $5 : 0; if (($5 <= 0) || ($5 > 255)) cf_error("Merge paths limit must be in range 1-255"); }
|
||||||
;
|
;
|
||||||
|
|
||||||
/* Kernel interface protocol */
|
/* Kernel interface protocol */
|
||||||
|
|
|
@ -595,9 +595,13 @@ krt_flush_routes(struct krt_proto *p)
|
||||||
static struct rte *
|
static struct rte *
|
||||||
krt_export_net(struct krt_proto *p, net *net, rte **rt_free, ea_list **tmpa)
|
krt_export_net(struct krt_proto *p, net *net, rte **rt_free, ea_list **tmpa)
|
||||||
{
|
{
|
||||||
struct filter *filter = p->p.main_ahook->out_filter;
|
struct announce_hook *ah = p->p.main_ahook;
|
||||||
|
struct filter *filter = ah->out_filter;
|
||||||
rte *rt;
|
rte *rt;
|
||||||
|
|
||||||
|
if (p->p.accept_ra_types == RA_MERGED)
|
||||||
|
return rt_export_merged(ah, net, rt_free, tmpa, 1);
|
||||||
|
|
||||||
rt = net->routes;
|
rt = net->routes;
|
||||||
*rt_free = NULL;
|
*rt_free = NULL;
|
||||||
|
|
||||||
|
@ -1091,11 +1095,13 @@ krt_rte_same(rte *a, rte *b)
|
||||||
struct krt_config *krt_cf;
|
struct krt_config *krt_cf;
|
||||||
|
|
||||||
static struct proto *
|
static struct proto *
|
||||||
krt_init(struct proto_config *c)
|
krt_init(struct proto_config *C)
|
||||||
{
|
{
|
||||||
struct krt_proto *p = proto_new(c, sizeof(struct krt_proto));
|
struct krt_proto *p = proto_new(C, sizeof(struct krt_proto));
|
||||||
|
struct krt_config *c = (struct krt_config *) C;
|
||||||
|
|
||||||
p->p.accept_ra_types = RA_OPTIMAL;
|
p->p.accept_ra_types = c->merge_paths ? RA_MERGED : RA_OPTIMAL;
|
||||||
|
p->p.merge_limit = c->merge_paths;
|
||||||
p->p.import_control = krt_import_control;
|
p->p.import_control = krt_import_control;
|
||||||
p->p.rt_notify = krt_rt_notify;
|
p->p.rt_notify = krt_rt_notify;
|
||||||
p->p.if_notify = krt_if_notify;
|
p->p.if_notify = krt_if_notify;
|
||||||
|
@ -1161,7 +1167,8 @@ krt_reconfigure(struct proto *p, struct proto_config *new)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/* persist, graceful restart need not be the same */
|
/* persist, graceful restart need not be the same */
|
||||||
return o->scan_time == n->scan_time && o->learn == n->learn && o->devroutes == n->devroutes;
|
return o->scan_time == n->scan_time && o->learn == n->learn &&
|
||||||
|
o->devroutes == n->devroutes && o->merge_paths == n->merge_paths;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
|
@ -26,6 +26,8 @@ struct kif_proto;
|
||||||
#define KRF_DELETE 3 /* Should be deleted */
|
#define KRF_DELETE 3 /* Should be deleted */
|
||||||
#define KRF_IGNORE 4 /* To be ignored */
|
#define KRF_IGNORE 4 /* To be ignored */
|
||||||
|
|
||||||
|
#define KRT_DEFAULT_ECMP_LIMIT 16
|
||||||
|
|
||||||
#define EA_KRT_SOURCE EA_CODE(EAP_KRT, 0)
|
#define EA_KRT_SOURCE EA_CODE(EAP_KRT, 0)
|
||||||
#define EA_KRT_METRIC EA_CODE(EAP_KRT, 1)
|
#define EA_KRT_METRIC EA_CODE(EAP_KRT, 1)
|
||||||
|
|
||||||
|
@ -47,6 +49,7 @@ struct krt_config {
|
||||||
int learn; /* Learn routes from other sources */
|
int learn; /* Learn routes from other sources */
|
||||||
int devroutes; /* Allow export of device routes */
|
int devroutes; /* Allow export of device routes */
|
||||||
int graceful_restart; /* Regard graceful restart recovery */
|
int graceful_restart; /* Regard graceful restart recovery */
|
||||||
|
int merge_paths; /* Exported routes are merged for ECMP */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct krt_proto {
|
struct krt_proto {
|
||||||
|
|
Loading…
Reference in a new issue