BGP: AIGP metric support (RFC 7311)

This commit is contained in:
Ondrej Zajicek (work) 2019-09-28 14:17:20 +02:00
parent 759b204be3
commit 09ee846d92
8 changed files with 359 additions and 16 deletions

View file

@ -2161,6 +2161,7 @@ avoid routing loops.
<item> <rfc id="6286"> - AS-Wide Unique BGP Identifier
<item> <rfc id="6608"> - Subcodes for BGP Finite State Machine Error
<item> <rfc id="6793"> - BGP Support for 4-Octet AS Numbers
<item> <rfc id="7311"> - Accumulated IGP Metric Attribute for BGP
<item> <rfc id="7313"> - Enhanced Route Refresh Capability for BGP
<item> <rfc id="7606"> - Revised Error Handling for BGP UPDATE Messages
<item> <rfc id="7911"> - Advertisement of Multiple Paths in BGP
@ -2739,6 +2740,36 @@ be used in explicit configuration.
TX direction. When active, all available routes accepted by the export
filter are advertised to the neighbor. Default: off.
<tag><label id="bgp-aigp">aigp <m/switch/|originate</tag>
The BGP protocol does not use a common metric like other routing
protocols, instead it uses a set of criteria for route selection
consisting both overall AS path length and a distance to the nearest AS
boundary router. Assuming that metrics of different autonomous systems
are incomparable, once a route is propagated from an AS to a next one,
the distance in the old AS does not matter.
The AIGP extension (<rfc id="7311">) allows to propagate accumulated
IGP metric (in the AIGP attribute) through both IBGP and EBGP links,
computing total distance through multiple autonomous systems (assuming
they use comparable IGP metric). The total AIGP metric is compared in
the route selection process just after Local Preference comparison (and
before AS path length comparison).
This option controls whether AIGP attribute propagation is allowed on
the session. Optionally, it can be set to <cf/originate/, which not only
allows AIGP attribute propagation, but also new AIGP attributes are
automatically attached to non-BGP routes with valid IGP metric (e.g.
<cf/ospf_metric1/) as they are exported to the BGP session. Default:
enabled for IBGP (and intra-confederation EBGP), disabled for regular
EBGP.
<tag><label id="bgp-cost">cost <m/number/</tag>
When BGP <ref id="bgp-gateway" name="gateway mode"> is <cf/recursive/
(mainly multihop IBGP sessions), then the distance to BGP next hop is
based on underlying IGP metric. This option specifies the distance to
BGP next hop for BGP sessions in direct gateway mode (mainly direct
EBGP sessions).
<tag><label id="bgp-graceful-restart-c">graceful restart <m/switch/</tag>
Although BGP graceful restart is configured mainly by protocol-wide
<ref id="bgp-graceful-restart" name="options">, it is possible to
@ -2807,9 +2838,11 @@ some of them (marked with `<tt/O/') are optional.
presence of which indicates that the route has been aggregated from
multiple routes by some router on the path from the originator.
<!-- we don't handle aggregators right since they are of a very obscure type
<tag>bgp_aggregator</tag>
-->
<tag><label id="rta-bgp-aggregator">void bgp_aggregator [O]</tag>
This is an optional attribute specifying AS number and IP address of the
BGP router that created the route by aggregating multiple BGP routes.
Currently, the attribute is not accessible from filters.
<tag><label id="rta-bgp-community">clist bgp_community [O]</tag>
List of community values associated with the route. Each such value is a
pair (represented as a <cf/pair/ data type inside the filters) of 16-bit
@ -2844,6 +2877,11 @@ some of them (marked with `<tt/O/') are optional.
<tag><label id="rta-bgp-cluster-list">clist bgp_cluster_list [I, O]</tag>
This attribute contains a list of cluster IDs of route reflectors. Each
route reflector prepends its cluster ID when reflecting the route.
<tag><label id="rta-bgp-aigp">void bgp_aigp [O]</tag>
This attribute contains accumulated IGP metric, which is a total
distance to the destination through multiple autonomous systems.
Currently, the attribute is not accessible from filters.
</descrip>
<sect1>Example

View file

@ -652,6 +652,7 @@ void rta_dump(rta *);
void rta_dump_all(void);
void rta_show(struct cli *, rta *);
u32 rt_get_igp_metric(rte *rt);
struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep);
void rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls);

View file

@ -44,6 +44,10 @@
#include "lib/string.h"
#include "lib/alloca.h"
#ifdef CONFIG_BGP
#include "proto/bgp/bgp.h"
#endif
pool *rt_table_pool;
static slab *rte_slab;
@ -2934,7 +2938,7 @@ if_local_addr(ip_addr a, struct iface *i)
return 0;
}
static u32
u32
rt_get_igp_metric(rte *rt)
{
eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
@ -2956,6 +2960,14 @@ rt_get_igp_metric(rte *rt)
return rt->u.rip.metric;
#endif
#ifdef CONFIG_BGP
if (a->source == RTS_BGP)
{
u64 metric = bgp_total_aigp_metric(rt);
return (u32) MIN(metric, (u64) IGP_METRIC_UNKNOWN);
}
#endif
if (a->source == RTS_DEVICE)
return 0;

View file

@ -34,7 +34,7 @@
* are probably inadequate.
*
* Loop detection based on AS_PATH causes updates to be withdrawn. RFC
* 4271 does not explicitly specifiy the behavior in that case.
* 4271 does not explicitly specify the behavior in that case.
*
* Loop detection related to route reflection (based on ORIGINATOR_ID
* and CLUSTER_LIST) causes updates to be withdrawn. RFC 4456 8
@ -199,6 +199,179 @@ bgp_encode_raw(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
}
/*
* AIGP handling
*/
static int
bgp_aigp_valid(byte *data, uint len, char *err, uint elen)
{
byte *pos = data;
char *err_dsc = NULL;
uint err_val = 0;
#define BAD(DSC,VAL) ({ err_dsc = DSC; err_val = VAL; goto bad; })
while (len)
{
if (len < 3)
BAD("TLV framing error", len);
/* Process one TLV */
uint ptype = pos[0];
uint plen = get_u16(pos + 1);
if (len < plen)
BAD("TLV framing error", plen);
if (plen < 3)
BAD("Bad TLV length", plen);
if ((ptype == BGP_AIGP_METRIC) && (plen != 11))
BAD("Bad AIGP TLV length", plen);
ADVANCE(pos, len, plen);
}
#undef BAD
return 1;
bad:
if (err)
if (bsnprintf(err, elen, "%s (%u) at %d", err_dsc, err_val, (int) (pos - data)) < 0)
err[0] = 0;
return 0;
}
static const byte *
bgp_aigp_get_tlv(const struct adata *ad, uint type)
{
if (!ad)
return NULL;
uint len = ad->length;
const byte *pos = ad->data;
while (len)
{
uint ptype = pos[0];
uint plen = get_u16(pos + 1);
if (ptype == type)
return pos;
ADVANCE(pos, len, plen);
}
return NULL;
}
static const struct adata *
bgp_aigp_set_tlv(struct linpool *pool, const struct adata *ad, uint type, byte *data, uint dlen)
{
uint len = ad ? ad->length : 0;
const byte *pos = ad ? ad->data : NULL;
struct adata *res = lp_alloc_adata(pool, len + 3 + dlen);
byte *dst = res->data;
byte *tlv = NULL;
int del = 0;
while (len)
{
uint ptype = pos[0];
uint plen = get_u16(pos + 1);
/* Find position for new TLV */
if ((ptype >= type) && !tlv)
{
tlv = dst;
dst += 3 + dlen;
}
/* Skip first matching TLV, copy others */
if ((ptype == type) && !del)
del = 1;
else
{
memcpy(dst, pos, plen);
dst += plen;
}
ADVANCE(pos, len, plen);
}
if (!tlv)
{
tlv = dst;
dst += 3 + dlen;
}
/* Store the TLD */
put_u8(tlv + 0, type);
put_u16(tlv + 1, 3 + dlen);
memcpy(tlv + 3, data, dlen);
/* Update length */
res->length = dst - res->data;
return res;
}
static u64 UNUSED
bgp_aigp_get_metric(const struct adata *ad, u64 def)
{
const byte *b = bgp_aigp_get_tlv(ad, BGP_AIGP_METRIC);
return b ? get_u64(b + 3) : def;
}
static const struct adata *
bgp_aigp_set_metric(struct linpool *pool, const struct adata *ad, u64 metric)
{
byte data[8];
put_u64(data, metric);
return bgp_aigp_set_tlv(pool, ad, BGP_AIGP_METRIC, data, 8);
}
int
bgp_total_aigp_metric_(rte *e, u64 *metric, const struct adata **ad)
{
eattr *a = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AIGP));
if (!a)
return 0;
const byte *b = bgp_aigp_get_tlv(a->u.ptr, BGP_AIGP_METRIC);
if (!b)
return 0;
u64 aigp = get_u64(b + 3);
u64 step = e->attrs->igp_metric;
if (!rte_resolvable(e) || (step >= IGP_METRIC_UNKNOWN))
step = BGP_AIGP_MAX;
if (!step)
step = 1;
*ad = a->u.ptr;
*metric = aigp + step;
if (*metric < aigp)
*metric = BGP_AIGP_MAX;
return 1;
}
static inline int
bgp_init_aigp_metric(rte *e, u64 *metric, const struct adata **ad)
{
if (e->attrs->source == RTS_BGP)
return 0;
*metric = rt_get_igp_metric(e);
*ad = NULL;
return *metric < IGP_METRIC_UNKNOWN;
}
/*
* Attribute hooks
*/
@ -604,6 +777,42 @@ bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byt
bgp_set_attr_ptr(to, s->pool, BA_AS4_PATH, flags, a);
}
static void
bgp_export_aigp(struct bgp_export_state *s, eattr *a)
{
if (!s->channel->cf->aigp)
UNSET(a);
}
static void
bgp_decode_aigp(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
{
char err[128];
/* Acceptability test postponed to bgp_finish_attrs() */
if ((flags ^ bgp_attr_table[BA_AIGP].flags) & (BAF_OPTIONAL | BAF_TRANSITIVE))
DISCARD("Malformed AIGP attribute - conflicting flags (%02x)", flags);
if (!bgp_aigp_valid(data, len, err, sizeof(err)))
DISCARD("Malformed AIGP attribute - %s", err);
bgp_set_attr_data(to, s->pool, BA_AIGP, flags, data, len);
}
static void
bgp_format_aigp(eattr *a, byte *buf, uint size UNUSED)
{
const byte *b = bgp_aigp_get_tlv(a->u.ptr, BGP_AIGP_METRIC);
if (!b)
bsprintf(buf, "?");
else
bsprintf(buf, "%lu", get_u64(b + 3));
}
static void
bgp_export_large_community(struct bgp_export_state *s, eattr *a)
{
@ -820,6 +1029,15 @@ static const struct bgp_attr_desc bgp_attr_table[] = {
.decode = bgp_decode_as4_aggregator,
.format = bgp_format_aggregator,
},
[BA_AIGP] = {
.name = "aigp",
.type = EAF_TYPE_OPAQUE,
.flags = BAF_OPTIONAL | BAF_DECODE_FLAGS,
.export = bgp_export_aigp,
.encode = bgp_encode_raw,
.decode = bgp_decode_aigp,
.format = bgp_format_aigp,
},
[BA_LARGE_COMMUNITY] = {
.name = "large_community",
.type = EAF_TYPE_LC_SET,
@ -1021,7 +1239,8 @@ bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, ui
const struct bgp_attr_desc *desc = &bgp_attr_table[code];
/* Handle conflicting flags; RFC 7606 3 (c) */
if ((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE))
if (((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)) &&
!(desc->flags & BAF_DECODE_FLAGS))
WITHDRAW("Malformed %s attribute - conflicting flags (%02x)", desc->name, flags);
desc->decode(s, code, flags, data, len, to);
@ -1150,6 +1369,17 @@ withdraw:
return NULL;
}
void
bgp_finish_attrs(struct bgp_parse_state *s, rta *a)
{
/* AIGP test here instead of in bgp_decode_aigp() - we need to know channel */
if (BIT32_TEST(s->attrs_seen, BA_AIGP) && !s->channel->cf->aigp)
{
REPORT("Discarding AIGP attribute received on non-AIGP session");
bgp_unset_attr(&a->eattrs, s->pool, BA_AIGP);
}
}
/*
* Route bucket hash table
@ -1481,6 +1711,16 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at
if (p->is_interior && ! bgp_find_attr(attrs0, BA_LOCAL_PREF))
bgp_set_attr_u32(&attrs, pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
/* AIGP attribute - accumulate local metric or originate new one */
u64 metric;
if (s.local_next_hop &&
(bgp_total_aigp_metric_(e, &metric, &ad) ||
(c->cf->aigp_originate && bgp_init_aigp_metric(e, &metric, &ad))))
{
ad = bgp_aigp_set_metric(pool, ad, metric);
bgp_set_attr_ptr(&attrs, pool, BA_AIGP, 0, ad);
}
/* IBGP route reflection, RFC 4456 */
if (src && src->is_internal && p->is_internal && (src->local_as == p->local_as))
{
@ -1578,12 +1818,6 @@ bgp_get_neighbor(rte *r)
return p->cf->confederation ?: p->local_as;
}
static inline int
rte_resolvable(rte *rt)
{
return rt->attrs->dest == RTD_UNICAST;
}
static inline int
rte_stale(rte *r)
{
@ -1639,6 +1873,14 @@ bgp_rte_better(rte *new, rte *old)
if (n < o)
return 0;
/* RFC 7311 4.1 - Apply AIGP metric */
u64 n2 = bgp_total_aigp_metric(new);
u64 o2 = bgp_total_aigp_metric(old);
if (n2 < o2)
return 1;
if (n2 > o2)
return 0;
/* RFC 4271 9.1.2.2. a) Use AS path lengths */
if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths)
{
@ -2062,7 +2304,12 @@ bgp_get_route_info(rte *e, byte *buf)
if (rte_stale(e))
buf += bsprintf(buf, "s");
if (e->attrs->hostentry)
u64 metric = bgp_total_aigp_metric(e);
if (metric < BGP_AIGP_MAX)
{
buf += bsprintf(buf, "/%lu", metric);
}
else if (e->attrs->igp_metric)
{
if (!rte_resolvable(e))
buf += bsprintf(buf, "/-");

View file

@ -92,6 +92,7 @@
* RFC 6286 - AS-Wide Unique BGP Identifier
* RFC 6608 - Subcodes for BGP Finite State Machine Error
* RFC 6793 - BGP Support for 4-Octet AS Numbers
* RFC 7311 - Accumulated IGP Metric Attribute for BGP
* RFC 7313 - Enhanced Route Refresh Capability for BGP
* RFC 7606 - Revised Error Handling for BGP UPDATE Messages
* RFC 7911 - Advertisement of Multiple Paths in BGP
@ -1979,6 +1980,10 @@ bgp_postconfig(struct proto_config *CF)
if (cc->llgr_time == ~0U)
cc->llgr_time = cf->llgr_time;
/* AIGP enabled by default on interior sessions */
if (cc->aigp == 0xff)
cc->aigp = interior;
/* Default values of IGP tables */
if ((cc->gw_mode == GW_RECURSIVE) && !cc->desc->no_igp)
{
@ -2087,13 +2092,17 @@ bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *impor
if (new->mandatory && !old->mandatory && (C->channel_state != CS_UP))
return 0;
if (new->gw_mode != old->gw_mode)
if ((new->gw_mode != old->gw_mode) ||
(new->aigp != old->aigp) ||
(new->cost != old->cost))
*import_changed = 1;
if (!ipa_equal(new->next_hop_addr, old->next_hop_addr) ||
(new->next_hop_self != old->next_hop_self) ||
(new->next_hop_keep != old->next_hop_keep) ||
(new->missing_lladdr != old->missing_lladdr))
(new->missing_lladdr != old->missing_lladdr) ||
(new->aigp != old->aigp) ||
(new->aigp_originate != old->aigp_originate))
*export_changed = 1;
c->cf = new;

View file

@ -149,6 +149,9 @@ struct bgp_channel_config {
uint llgr_time; /* Long-lived graceful restart stale time */
u8 ext_next_hop; /* Allow both IPv4 and IPv6 next hops */
u8 add_path; /* Use ADD-PATH extension [RFC 7911] */
u8 aigp; /* AIGP is allowed on this session */
u8 aigp_originate; /* AIGP is originated automatically */
u32 cost; /* IGP cost for direct next hops */
u8 import_table; /* Use c.in_table as Adj-RIB-In */
u8 export_table; /* Use c.out_table as Adj-RIB-Out */
@ -379,6 +382,7 @@ struct bgp_export_state {
u32 attrs_seen[1];
uint err_withdraw;
uint local_next_hop;
};
struct bgp_write_state {
@ -493,6 +497,11 @@ void bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len);
struct rte_source *bgp_find_source(struct bgp_proto *p, u32 path_id);
struct rte_source *bgp_get_source(struct bgp_proto *p, u32 path_id);
static inline int
rte_resolvable(rte *rt)
{
return rt->attrs->dest == RTD_UNICAST;
}
#ifdef LOCAL_DEBUG
@ -541,6 +550,7 @@ bgp_unset_attr(ea_list **to, struct linpool *pool, uint code)
int bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte *end);
ea_list * bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len);
void bgp_finish_attrs(struct bgp_parse_state *s, rta *a);
void bgp_init_bucket_table(struct bgp_channel *c);
void bgp_free_bucket_table(struct bgp_channel *c);
@ -560,6 +570,20 @@ void bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *ol
int bgp_preexport(struct proto *, struct rte **, struct linpool *);
int bgp_get_attr(struct eattr *e, byte *buf, int buflen);
void bgp_get_route_info(struct rte *, byte *buf);
int bgp_total_aigp_metric_(rte *e, u64 *metric, const struct adata **ad);
#define BGP_AIGP_METRIC 1
#define BGP_AIGP_MAX U64(0xffffffffffffffff)
static inline u64
bgp_total_aigp_metric(rte *r)
{
u64 metric = BGP_AIGP_MAX;
const struct adata *ad;
bgp_total_aigp_metric_(r, &metric, &ad);
return metric;
}
/* packets.c */
@ -595,6 +619,8 @@ void bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to);
#define BAF_PARTIAL 0x20
#define BAF_EXT_LEN 0x10
#define BAF_DECODE_FLAGS 0x0100 /* Private flag - attribute flags are handled by the decode hook */
#define BA_ORIGIN 0x01 /* RFC 4271 */ /* WM */
#define BA_AS_PATH 0x02 /* WM */
#define BA_NEXT_HOP 0x03 /* WM */
@ -610,6 +636,7 @@ void bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to);
#define BA_EXT_COMMUNITY 0x10 /* RFC 4360 */
#define BA_AS4_PATH 0x11 /* RFC 6793 */
#define BA_AS4_AGGREGATOR 0x12 /* RFC 6793 */
#define BA_AIGP 0x1a /* RFC 7311 */
#define BA_LARGE_COMMUNITY 0x20 /* RFC 8092 */
/* Bird's private internal BGP attributes */

View file

@ -30,7 +30,7 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE,
GRACEFUL, RESTART, AWARE, CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY,
STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6, LONG,
LIVED, STALE, IMPORT, IBGP, EBGP, MANDATORY, INTERNAL, EXTERNAL,
DYNAMIC, RANGE, NAME, DIGITS)
DYNAMIC, RANGE, NAME, DIGITS, BGP_AIGP, AIGP, ORIGINATE, COST)
%type <i> bgp_nh
%type <i32> bgp_afi
@ -227,6 +227,7 @@ bgp_channel_start: bgp_afi
BGP_CC->gr_able = 0xff; /* undefined */
BGP_CC->llgr_able = 0xff; /* undefined */
BGP_CC->llgr_time = ~0U; /* undefined */
BGP_CC->aigp = 0xff; /* undefined */
}
};
@ -256,6 +257,9 @@ bgp_channel_item:
| ADD PATHS bool { BGP_CC->add_path = $3 ? BGP_ADD_PATH_FULL : 0; }
| IMPORT TABLE bool { BGP_CC->import_table = $3; }
| EXPORT TABLE bool { BGP_CC->export_table = $3; }
| AIGP bool { BGP_CC->aigp = $2; BGP_CC->aigp_originate = 0; }
| AIGP ORIGINATE { BGP_CC->aigp = 1; BGP_CC->aigp_originate = 1; }
| COST expr { BGP_CC->cost = $2; if ($2 < 1) cf_error("Cost must be positive"); }
| IGP TABLE rtable {
if (BGP_CC->desc->no_igp)
cf_error("IGP table not allowed here");
@ -312,6 +316,8 @@ dynamic_attr: BGP_CLUSTER_LIST
{ $$ = f_new_dynamic_attr(EAF_TYPE_INT_SET, T_CLIST, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST)); } ;
dynamic_attr: BGP_EXT_COMMUNITY
{ $$ = f_new_dynamic_attr(EAF_TYPE_EC_SET, T_ECLIST, EA_CODE(PROTOCOL_BGP, BA_EXT_COMMUNITY)); } ;
dynamic_attr: BGP_AIGP
{ $$ = f_new_dynamic_attr(EAF_TYPE_OPAQUE, T_ENUM_EMPTY, EA_CODE(PROTOCOL_BGP, BA_AIGP)); } ;
dynamic_attr: BGP_LARGE_COMMUNITY
{ $$ = f_new_dynamic_attr(EAF_TYPE_LC_SET, T_LCLIST, EA_CODE(PROTOCOL_BGP, BA_LARGE_COMMUNITY)); } ;

View file

@ -919,6 +919,7 @@ bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll)
a->dest = RTD_UNICAST;
a->nh.gw = nbr->addr;
a->nh.iface = nbr->iface;
a->igp_metric = c->cf->cost;
}
else /* GW_RECURSIVE */
{
@ -1063,6 +1064,7 @@ bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to)
{
ip_addr nh[2] = { s->channel->next_hop_addr, s->channel->link_addr };
bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16);
s->local_next_hop = 1;
/* TODO: Use local MPLS assigned label */
if (s->mpls)
@ -2401,6 +2403,7 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis
a->eattrs = ea;
c->desc->decode_next_hop(s, nh, nh_len, a);
bgp_finish_attrs(s, a);
/* Handle withdraw during next hop decoding */
if (s->err_withdraw)