diff --git a/doc/bird.sgml b/doc/bird.sgml
index 1c2dda4b..752465b9 100644
--- a/doc/bird.sgml
+++ b/doc/bird.sgml
@@ -2227,6 +2227,18 @@ limitations can be overcome using another routing table and the pipe protocol.
a graceful restart recovery is active, the Kernel protocol will defer
synchronization of routing tables until the end of the recovery. Note
that import of kernel routes to BIRD is not affected.
+
+ merge paths switch [limit number]
+ Usually, only best routes are exported to the kernel protocol. With path
+ merging enabled, both best routes and equivalent non-best routes are
+ merged during export to generate one ECMP (equal-cost multipath) route
+ for each network. This is useful e.g. for BGP multipath. Note that best
+ routes are still pivotal for route export (responsible for most
+ properties of resulting ECMP routes), while exported non-best routes are
+ responsible just for additional multipath next hops. This option also
+ allows to specify a limit on maximal number of nexthops in one route. By
+ default, multipath merging is disabled. If enabled, default value of the
+ limit is 16.
Attributes
diff --git a/filter/filter.c b/filter/filter.c
index 3b14fc0c..3f8968aa 100644
--- a/filter/filter.c
+++ b/filter/filter.c
@@ -471,26 +471,22 @@ static inline void f_rte_cow(void)
static void
f_rta_cow(void)
{
- if ((*f_rte)->attrs->aflags & RTAF_CACHED) {
+ if (!rta_is_cached((*f_rte)->attrs))
+ return;
- /* Prepare to modify rte */
- f_rte_cow();
+ /* Prepare to modify rte */
+ f_rte_cow();
- /* Store old rta to free it later */
- f_old_rta = (*f_rte)->attrs;
+ /* Store old rta to free it later, it stores reference from rte_cow() */
+ f_old_rta = (*f_rte)->attrs;
- /*
- * Alloc new rta, do shallow copy and update rte. Fields eattrs
- * and nexthops of rta are shared with f_old_rta (they will be
- * copied when the cached rta will be obtained at the end of
- * f_run()), also the lock of hostentry is inherited (we suppose
- * hostentry is not changed by filters).
- */
- rta *ra = lp_alloc(f_pool, sizeof(rta));
- memcpy(ra, f_old_rta, sizeof(rta));
- ra->aflags = 0;
- (*f_rte)->attrs = ra;
- }
+ /*
+ * Get shallow copy of rta. Fields eattrs and nexthops of rta are shared
+ * with f_old_rta (they will be copied when the cached rta will be obtained
+ * at the end of f_run()), also the lock of hostentry is inherited (we
+ * suppose hostentry is not changed by filters).
+ */
+ (*f_rte)->attrs = rta_do_cow((*f_rte)->attrs, f_pool);
}
static struct tbf rl_runtime_err = TBF_DEFAULT_LOG_LIMITS;
diff --git a/lib/birdlib.h b/lib/birdlib.h
index 94054769..ad41dca3 100644
--- a/lib/birdlib.h
+++ b/lib/birdlib.h
@@ -31,6 +31,7 @@
#endif
#define ABS(a) ((a)>=0 ? (a) : -(a))
+#define DELTA(a,b) (((a)>=(b))?(a)-(b):(b)-(a))
#define ARRAY_SIZE(a) (sizeof(a)/sizeof(*(a)))
diff --git a/nest/protocol.h b/nest/protocol.h
index a51e9afd..8c49154f 100644
--- a/nest/protocol.h
+++ b/nest/protocol.h
@@ -158,6 +158,7 @@ struct proto {
byte gr_wait; /* Route export to protocol is postponed until graceful restart */
byte down_sched; /* Shutdown is scheduled for later (PDS_*) */
byte down_code; /* Reason for shutdown (PDC_* codes) */
+ byte merge_limit; /* Maximal number of nexthops for RA_MERGED */
u32 hash_key; /* Random key used for hashing of neighbors */
bird_clock_t last_state_change; /* Time of last state transition */
char *last_state_name_announced; /* Last state name we've announced to the user */
@@ -200,6 +201,7 @@ struct proto {
* rte_recalculate Called at the beginning of the best route selection
* rte_better Compare two rte's and decide which one is better (1=first, 0=second).
* rte_same Compare two rte's and decide whether they are identical (1=yes, 0=no).
+ * rte_mergable Compare two rte's and decide whether they could be merged (1=yes, 0=no).
* rte_insert Called whenever a rte is inserted to a routing table.
* rte_remove Called whenever a rte is removed from the routing table.
*/
@@ -207,6 +209,7 @@ struct proto {
int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *);
int (*rte_better)(struct rte *, struct rte *);
int (*rte_same)(struct rte *, struct rte *);
+ int (*rte_mergable)(struct rte *, struct rte *);
void (*rte_insert)(struct network *, struct rte *);
void (*rte_remove)(struct network *, struct rte *);
diff --git a/nest/route.h b/nest/route.h
index e22f950b..6067526d 100644
--- a/nest/route.h
+++ b/nest/route.h
@@ -240,6 +240,7 @@ static inline int rte_is_filtered(rte *r) { return !!(r->flags & REF_FILTERED);
#define RA_OPTIMAL 1 /* Announcement of optimal route change */
#define RA_ACCEPTED 2 /* Announcement of first accepted route */
#define RA_ANY 3 /* Announcement of any route change */
+#define RA_MERGED 4 /* Announcement of optimal route merged with next ones */
/* Return value of import_control() callback */
#define RIC_ACCEPT 1 /* Accepted by protocol */
@@ -263,12 +264,14 @@ void rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *s
static inline void rte_update(struct proto *p, net *net, rte *new) { rte_update2(p->main_ahook, net, new, p->main_source); }
void rte_discard(rtable *tab, rte *old);
int rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter);
+rte *rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, struct ea_list **tmpa, int silent);
void rt_refresh_begin(rtable *t, struct announce_hook *ah);
void rt_refresh_end(rtable *t, struct announce_hook *ah);
void rte_dump(rte *);
void rte_free(rte *);
rte *rte_do_cow(rte *);
static inline rte * rte_cow(rte *r) { return (r->flags & REF_COW) ? rte_do_cow(r) : r; }
+rte *rte_cow_rta(rte *r, linpool *lp);
void rt_dump(rtable *);
void rt_dump_all(void);
int rt_feed_baby(struct proto *p);
@@ -388,6 +391,12 @@ typedef struct rta {
#define IGP_METRIC_UNKNOWN 0x80000000 /* Default igp_metric used when no other
protocol-specific metric is availabe */
+
+/* Route has regular, reachable nexthop (i.e. not RTD_UNREACHABLE and like) */
+static inline int rte_is_reachable(rte *r)
+{ uint d = r->attrs->dest; return (d == RTD_ROUTER) || (d == RTD_DEVICE) || (d == RTD_MULTIPATH); }
+
+
/*
* Extended Route Attributes
*/
@@ -490,6 +499,8 @@ static inline int rta_is_cached(rta *r) { return r->aflags & RTAF_CACHED; }
static inline rta *rta_clone(rta *r) { r->uc++; return r; }
void rta__free(rta *r);
static inline void rta_free(rta *r) { if (r && !--r->uc) rta__free(r); }
+rta *rta_do_cow(rta *o, linpool *lp);
+static inline rta * rta_cow(rta *r, linpool *lp) { return rta_is_cached(r) ? rta_do_cow(r, lp) : r; }
void rta_dump(rta *);
void rta_dump_all(void);
void rta_show(struct cli *, rta *, ea_list *);
diff --git a/nest/rt-attr.c b/nest/rt-attr.c
index 32090b52..7fa05d6d 100644
--- a/nest/rt-attr.c
+++ b/nest/rt-attr.c
@@ -1138,6 +1138,16 @@ rta__free(rta *a)
sl_free(rta_slab, a);
}
+rta *
+rta_do_cow(rta *o, linpool *lp)
+{
+ rta *r = lp_alloc(lp, sizeof(rta));
+ memcpy(r, o, sizeof(rta));
+ r->aflags = 0;
+ r->uc = 0;
+ return r;
+}
+
/**
* rta_dump - dump route attributes
* @a: attribute structure to dump
diff --git a/nest/rt-table.c b/nest/rt-table.c
index 22e1c489..9e2c4e0d 100644
--- a/nest/rt-table.c
+++ b/nest/rt-table.c
@@ -144,6 +144,38 @@ rte_do_cow(rte *r)
return e;
}
+/**
+ * rte_cow_rta - get a private writable copy of &rte with writable &rta
+ * @r: a route entry to be copied
+ * @lp: a linpool from which to allocate &rta
+ *
+ * rte_cow_rta() takes a &rte and prepares it and associated &rta for
+ * modification. There are three possibilities: First, both &rte and &rta are
+ * private copies, in that case they are returned unchanged. Second, &rte is
+ * private copy, but &rta is cached, in that case &rta is duplicated using
+ * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
+ * both structures are duplicated by rte_do_cow() and rta_do_cow().
+ *
+ * Note that in the second case, cached &rta loses one reference, while private
+ * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
+ * nexthops, ...) with it. To work properly, original shared &rta should have
+ * another reference during the life of created private copy.
+ *
+ * Result: a pointer to the new writable &rte with writable &rta.
+ */
+rte *
+rte_cow_rta(rte *r, linpool *lp)
+{
+ if (!rta_is_cached(r->attrs))
+ return r;
+
+ rte *e = rte_cow(r);
+ rta *a = rta_do_cow(r->attrs, lp);
+ rta_free(e->attrs);
+ e->attrs = a;
+ return e;
+}
+
static int /* Actually better or at least as good as */
rte_better(rte *new, rte *old)
{
@@ -172,6 +204,26 @@ rte_better(rte *new, rte *old)
return 0;
}
+static int
+rte_mergable(rte *pri, rte *sec)
+{
+ int (*mergable)(rte *, rte *);
+
+ if (!rte_is_valid(pri) || !rte_is_valid(sec))
+ return 0;
+
+ if (pri->pref != sec->pref)
+ return 0;
+
+ if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
+ return 0;
+
+ if (mergable = pri->attrs->src->proto->rte_mergable)
+ return mergable(pri, sec);
+
+ return 0;
+}
+
static void
rte_trace(struct proto *p, rte *e, int dir, char *msg)
{
@@ -535,6 +587,129 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol
rte_free(old_free);
}
+
+static struct mpnh *
+mpnh_merge_rta(struct mpnh *nhs, rta *a, int max)
+{
+ struct mpnh nh = { .gw = a->gw, .iface = a->iface };
+ struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh;
+ return mpnh_merge(nhs, nh2, 1, 0, max, rte_update_pool);
+}
+
+rte *
+rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, ea_list **tmpa, int silent)
+{
+ // struct proto *p = ah->proto;
+ struct mpnh *nhs = NULL;
+ rte *best0, *best, *rt0, *rt, *tmp;
+
+ best0 = net->routes;
+ *rt_free = NULL;
+
+ if (!rte_is_valid(best0))
+ return NULL;
+
+ best = export_filter(ah, best0, rt_free, tmpa, silent);
+
+ if (!best || !rte_is_reachable(best))
+ return best;
+
+ for (rt0 = best0->next; rt0; rt0 = rt0->next)
+ {
+ if (!rte_mergable(best0, rt0))
+ continue;
+
+ rt = export_filter(ah, rt0, &tmp, NULL, 1);
+
+ if (!rt)
+ continue;
+
+ if (rte_is_reachable(rt))
+ nhs = mpnh_merge_rta(nhs, rt->attrs, ah->proto->merge_limit);
+
+ if (tmp)
+ rte_free(tmp);
+ }
+
+ if (nhs)
+ {
+ nhs = mpnh_merge_rta(nhs, best->attrs, ah->proto->merge_limit);
+
+ if (nhs->next)
+ {
+ best = rte_cow_rta(best, rte_update_pool);
+ best->attrs->dest = RTD_MULTIPATH;
+ best->attrs->nexthops = nhs;
+ }
+ }
+
+ if (best != best0)
+ *rt_free = best;
+
+ return best;
+}
+
+
+static void
+rt_notify_merged(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed,
+ rte *new_best, rte*old_best, int refeed)
+{
+ // struct proto *p = ah->proto;
+
+ rte *new_best_free = NULL;
+ rte *old_best_free = NULL;
+ rte *new_changed_free = NULL;
+ rte *old_changed_free = NULL;
+ ea_list *tmpa = NULL;
+
+ /* We assume that all rte arguments are either NULL or rte_is_valid() */
+
+ /* This check should be done by the caller */
+ if (!new_best && !old_best)
+ return;
+
+ /* Check whether the change is relevant to the merged route */
+ if ((new_best == old_best) && !refeed)
+ {
+ new_changed = rte_mergable(new_best, new_changed) ?
+ export_filter(ah, new_changed, &new_changed_free, NULL, 1) : NULL;
+
+ old_changed = rte_mergable(old_best, old_changed) ?
+ export_filter(ah, old_changed, &old_changed_free, NULL, 1) : NULL;
+
+ if (!new_changed && !old_changed)
+ return;
+ }
+
+ if (new_best)
+ ah->stats->exp_updates_received++;
+ else
+ ah->stats->exp_withdraws_received++;
+
+ /* Prepare new merged route */
+ if (new_best)
+ new_best = rt_export_merged(ah, net, &new_best_free, &tmpa, 0);
+
+ /* Prepare old merged route (without proper merged next hops) */
+ /* There are some issues with running filter on old route - see rt_notify_basic() */
+ if (old_best && !refeed)
+ old_best = export_filter(ah, old_best, &old_best_free, NULL, 1);
+
+ if (new_best || old_best)
+ do_rt_notify(ah, net, new_best, old_best, tmpa, refeed);
+
+ /* Discard temporary rte's */
+ if (new_best_free)
+ rte_free(new_best_free);
+ if (old_best_free)
+ rte_free(old_best_free);
+ if (new_changed_free)
+ rte_free(new_changed_free);
+ if (old_changed_free)
+ rte_free(old_changed_free);
+}
+
+
/**
* rte_announce - announce a routing table change
* @tab: table the route has been added to
@@ -564,13 +739,20 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol
* the protocol gets called.
*/
static void
-rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, rte *before_old)
+rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old,
+ rte *new_best, rte *old_best, rte *before_old)
{
+ if (!rte_is_valid(new))
+ new = NULL;
+
if (!rte_is_valid(old))
old = before_old = NULL;
- if (!rte_is_valid(new))
- new = NULL;
+ if (!rte_is_valid(new_best))
+ new_best = NULL;
+
+ if (!rte_is_valid(old_best))
+ old_best = NULL;
if (!old && !new)
return;
@@ -593,6 +775,8 @@ rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, rte *befo
if (a->proto->accept_ra_types == type)
if (type == RA_ACCEPTED)
rt_notify_accepted(a, net, new, old, before_old, 0);
+ else if (type == RA_MERGED)
+ rt_notify_merged(a, net, new, old, new_best, old_best, 0);
else
rt_notify_basic(a, net, new, old, 0);
}
@@ -898,11 +1082,12 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *sr
}
/* Propagate the route change */
- rte_announce(table, RA_ANY, net, new, old, NULL);
+ rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL);
if (net->routes != old_best)
- rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL);
+ rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL);
if (table->config->sorted)
- rte_announce(table, RA_ACCEPTED, net, new, old, before_old);
+ rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old);
+ rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL);
if (!net->routes &&
(table->gc_counter++ >= table->config->gc_max_ops) &&
@@ -1081,10 +1266,11 @@ rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src)
/* Independent call to rte_announce(), used from next hop
recalculation, outside of rte_update(). new must be non-NULL */
static inline void
-rte_announce_i(rtable *tab, unsigned type, net *n, rte *new, rte *old)
+rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old,
+ rte *new_best, rte *old_best)
{
rte_update_lock();
- rte_announce(tab, type, n, new, old, NULL);
+ rte_announce(tab, type, net, new, old, new_best, old_best, NULL);
rte_update_unlock();
}
@@ -1548,7 +1734,7 @@ rt_next_hop_update_net(rtable *tab, net *n)
new = rt_next_hop_update_rte(tab, e);
*k = new;
- rte_announce_i(tab, RA_ANY, n, new, e);
+ rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
/* Call a pre-comparison hook */
@@ -1588,10 +1774,13 @@ rt_next_hop_update_net(rtable *tab, net *n)
/* Announce the new best route */
if (new != old_best)
{
- rte_announce_i(tab, RA_OPTIMAL, n, new, old_best);
+ rte_announce_i(tab, RA_OPTIMAL, n, new, old_best, NULL, NULL);
rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
}
+ /* FIXME: Better announcement of merged routes */
+ rte_announce_i(tab, RA_MERGED, n, new, old_best, new, old_best);
+
if (free_old_best)
rte_free_quick(old_best);
@@ -1755,6 +1944,8 @@ do_feed_baby(struct proto *p, int type, struct announce_hook *h, net *n, rte *e)
rte_update_lock();
if (type == RA_ACCEPTED)
rt_notify_accepted(h, n, e, NULL, NULL, p->refeeding ? 2 : 1);
+ else if (type == RA_MERGED)
+ rt_notify_merged(h, n, NULL, NULL, e, p->refeeding ? e : NULL, p->refeeding);
else
rt_notify_basic(h, n, e, p->refeeding ? e : NULL, p->refeeding);
rte_update_unlock();
@@ -1802,7 +1993,8 @@ again:
/* XXXX perhaps we should change feed for RA_ACCEPTED to not use 'new' */
if ((p->accept_ra_types == RA_OPTIMAL) ||
- (p->accept_ra_types == RA_ACCEPTED))
+ (p->accept_ra_types == RA_ACCEPTED) ||
+ (p->accept_ra_types == RA_MERGED))
if (rte_is_valid(e))
{
if (p->export_state != ES_FEEDING)
@@ -2267,12 +2459,22 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d)
rte_update_lock(); /* We use the update buffer for filtering */
tmpa = make_tmp_attrs(e, rte_update_pool);
- if (d->export_mode)
+ /* Special case for merged export */
+ if ((d->export_mode == RSEM_EXPORT) && (d->export_protocol->accept_ra_types == RA_MERGED))
+ {
+ rte *rt_free;
+ e = rt_export_merged(a, n, &rt_free, &tmpa, 1);
+ pass = 1;
+
+ if (!e)
+ { e = ee; goto skip; }
+ }
+ else if (d->export_mode)
{
struct proto *ep = d->export_protocol;
int ic = ep->import_control ? ep->import_control(ep, &e, &tmpa, rte_update_pool) : 0;
- if (ep->accept_ra_types == RA_OPTIMAL)
+ if (ep->accept_ra_types == RA_OPTIMAL || ep->accept_ra_types == RA_MERGED)
pass = 1;
if (ic < 0)
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c
index d56c017d..d85afa8f 100644
--- a/proto/bgp/attrs.c
+++ b/proto/bgp/attrs.c
@@ -1312,6 +1312,82 @@ bgp_rte_better(rte *new, rte *old)
}
+int
+bgp_rte_mergable(rte *pri, rte *sec)
+{
+ struct bgp_proto *pri_bgp = (struct bgp_proto *) pri->attrs->src->proto;
+ struct bgp_proto *sec_bgp = (struct bgp_proto *) sec->attrs->src->proto;
+ eattr *x, *y;
+ u32 p, s;
+
+ /* Skip suppressed routes (see bgp_rte_recalculate()) */
+ if (pri->u.bgp.suppressed != sec->u.bgp.suppressed)
+ return 0;
+
+ /* RFC 4271 9.1.2.1. Route resolvability test */
+ if (!rte_resolvable(sec))
+ return 0;
+
+ /* Start with local preferences */
+ x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
+ y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
+ p = x ? x->u.data : pri_bgp->cf->default_local_pref;
+ s = y ? y->u.data : sec_bgp->cf->default_local_pref;
+ if (p != s)
+ return 0;
+
+ /* RFC 4271 9.1.2.2. a) Use AS path lengths */
+ if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths)
+ {
+ x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
+ y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
+ p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
+ s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
+
+ if (p != s)
+ return 0;
+
+// if (DELTA(p, s) > pri_bgp->cf->relax_multipath)
+// return 0;
+ }
+
+ /* RFC 4271 9.1.2.2. b) Use origins */
+ x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
+ y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
+ p = x ? x->u.data : ORIGIN_INCOMPLETE;
+ s = y ? y->u.data : ORIGIN_INCOMPLETE;
+ if (p != s)
+ return 0;
+
+ /* RFC 4271 9.1.2.2. c) Compare MED's */
+ if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric ||
+ (bgp_get_neighbor(pri) == bgp_get_neighbor(sec)))
+ {
+ x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
+ y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
+ p = x ? x->u.data : pri_bgp->cf->default_med;
+ s = y ? y->u.data : sec_bgp->cf->default_med;
+ if (p != s)
+ return 0;
+ }
+
+ /* RFC 4271 9.1.2.2. d) Prefer external peers */
+ if (pri_bgp->is_internal != sec_bgp->is_internal)
+ return 0;
+
+ /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
+ p = pri_bgp->cf->igp_metric ? pri->attrs->igp_metric : 0;
+ s = sec_bgp->cf->igp_metric ? sec->attrs->igp_metric : 0;
+ if (p != s)
+ return 0;
+
+ /* Remaining criteria are ignored */
+
+ return 1;
+}
+
+
+
static inline int
same_group(rte *r, u32 lpref, u32 lasn)
{
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
index e48b643b..9e28b278 100644
--- a/proto/bgp/bgp.c
+++ b/proto/bgp/bgp.c
@@ -1243,6 +1243,7 @@ bgp_init(struct proto_config *C)
P->feed_begin = bgp_feed_begin;
P->feed_end = bgp_feed_end;
P->rte_better = bgp_rte_better;
+ P->rte_mergable = bgp_rte_mergable;
P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL;
p->cf = c;
diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h
index 446fc857..b6e80fe5 100644
--- a/proto/bgp/bgp.h
+++ b/proto/bgp/bgp.h
@@ -238,6 +238,7 @@ byte *bgp_attach_attr_wa(struct ea_list **to, struct linpool *pool, unsigned att
struct rta *bgp_decode_attrs(struct bgp_conn *conn, byte *a, uint len, struct linpool *pool, int mandatory);
int bgp_get_attr(struct eattr *e, byte *buf, int buflen);
int bgp_rte_better(struct rte *, struct rte *);
+int bgp_rte_mergable(rte *pri, rte *sec);
int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best);
void bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs);
int bgp_import_control(struct proto *, struct rte **, struct ea_list **, struct linpool *);
diff --git a/proto/static/static.c b/proto/static/static.c
index 4b72fa9d..e7e7ab15 100644
--- a/proto/static/static.c
+++ b/proto/static/static.c
@@ -352,6 +352,12 @@ static_if_notify(struct proto *p, unsigned flags, struct iface *i)
}
}
+int
+static_rte_mergable(rte *pri, rte *sec)
+{
+ return 1;
+}
+
void
static_init_config(struct static_config *c)
{
@@ -366,6 +372,7 @@ static_init(struct proto_config *c)
p->neigh_notify = static_neigh_notify;
p->if_notify = static_if_notify;
+ p->rte_mergable = static_rte_mergable;
return p;
}
diff --git a/sysdep/unix/krt.Y b/sysdep/unix/krt.Y
index 630cda38..e036081d 100644
--- a/sysdep/unix/krt.Y
+++ b/sysdep/unix/krt.Y
@@ -17,7 +17,7 @@ CF_DEFINES
CF_DECLS
-CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, KRT_SOURCE, KRT_METRIC)
+CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, KRT_SOURCE, KRT_METRIC, MERGE, PATHS)
CF_GRAMMAR
@@ -47,6 +47,8 @@ kern_item:
}
| DEVICE ROUTES bool { THIS_KRT->devroutes = $3; }
| GRACEFUL RESTART bool { THIS_KRT->graceful_restart = $3; }
+ | MERGE PATHS bool { THIS_KRT->merge_paths = $3 ? KRT_DEFAULT_ECMP_LIMIT : 0; }
+ | MERGE PATHS bool LIMIT expr { THIS_KRT->merge_paths = $3 ? $5 : 0; if (($5 <= 0) || ($5 > 255)) cf_error("Merge paths limit must be in range 1-255"); }
;
/* Kernel interface protocol */
diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c
index d8d28c7c..2eab5cb2 100644
--- a/sysdep/unix/krt.c
+++ b/sysdep/unix/krt.c
@@ -595,9 +595,13 @@ krt_flush_routes(struct krt_proto *p)
static struct rte *
krt_export_net(struct krt_proto *p, net *net, rte **rt_free, ea_list **tmpa)
{
- struct filter *filter = p->p.main_ahook->out_filter;
+ struct announce_hook *ah = p->p.main_ahook;
+ struct filter *filter = ah->out_filter;
rte *rt;
+ if (p->p.accept_ra_types == RA_MERGED)
+ return rt_export_merged(ah, net, rt_free, tmpa, 1);
+
rt = net->routes;
*rt_free = NULL;
@@ -1091,11 +1095,13 @@ krt_rte_same(rte *a, rte *b)
struct krt_config *krt_cf;
static struct proto *
-krt_init(struct proto_config *c)
+krt_init(struct proto_config *C)
{
- struct krt_proto *p = proto_new(c, sizeof(struct krt_proto));
+ struct krt_proto *p = proto_new(C, sizeof(struct krt_proto));
+ struct krt_config *c = (struct krt_config *) C;
- p->p.accept_ra_types = RA_OPTIMAL;
+ p->p.accept_ra_types = c->merge_paths ? RA_MERGED : RA_OPTIMAL;
+ p->p.merge_limit = c->merge_paths;
p->p.import_control = krt_import_control;
p->p.rt_notify = krt_rt_notify;
p->p.if_notify = krt_if_notify;
@@ -1161,7 +1167,8 @@ krt_reconfigure(struct proto *p, struct proto_config *new)
return 0;
/* persist, graceful restart need not be the same */
- return o->scan_time == n->scan_time && o->learn == n->learn && o->devroutes == n->devroutes;
+ return o->scan_time == n->scan_time && o->learn == n->learn &&
+ o->devroutes == n->devroutes && o->merge_paths == n->merge_paths;
}
static void
diff --git a/sysdep/unix/krt.h b/sysdep/unix/krt.h
index 1940cbcd..9d5d4e8c 100644
--- a/sysdep/unix/krt.h
+++ b/sysdep/unix/krt.h
@@ -26,6 +26,8 @@ struct kif_proto;
#define KRF_DELETE 3 /* Should be deleted */
#define KRF_IGNORE 4 /* To be ignored */
+#define KRT_DEFAULT_ECMP_LIMIT 16
+
#define EA_KRT_SOURCE EA_CODE(EAP_KRT, 0)
#define EA_KRT_METRIC EA_CODE(EAP_KRT, 1)
@@ -47,6 +49,7 @@ struct krt_config {
int learn; /* Learn routes from other sources */
int devroutes; /* Allow export of device routes */
int graceful_restart; /* Regard graceful restart recovery */
+ int merge_paths; /* Exported routes are merged for ECMP */
};
struct krt_proto {