bird/proto/bgp/attrs.c
Maria Jan Matejka 026bfedb33 BGP: Prefix hash is too small, increase its max size.
This doesn't make any change for you until you have
millions of updates waiting to be sent. Increasing
the max hash size from 2^20 to 2^24.
2019-07-01 09:05:54 +02:00

2086 lines
57 KiB
C

/*
* BIRD -- BGP Attributes
*
* (c) 2000 Martin Mares <mj@ucw.cz>
* (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
* (c) 2008--2016 CZ.NIC z.s.p.o.
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
#undef LOCAL_DEBUG
#include <stdlib.h>
#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
#include "nest/route.h"
#include "nest/attrs.h"
#include "conf/conf.h"
#include "lib/resource.h"
#include "lib/string.h"
#include "lib/unaligned.h"
#include "bgp.h"
/*
* UPDATE message error handling
*
* All checks from RFC 4271 6.3 are done as specified with these exceptions:
* - The semantic check of an IP address from NEXT_HOP attribute is missing.
* - Checks of some optional attribute values are missing.
* - Syntactic and semantic checks of NLRIs (done in DECODE_PREFIX())
* are probably inadequate.
*
* Loop detection based on AS_PATH causes updates to be withdrawn. RFC
* 4271 does not explicitly specifiy the behavior in that case.
*
* Loop detection related to route reflection (based on ORIGINATOR_ID
* and CLUSTER_LIST) causes updates to be withdrawn. RFC 4456 8
* specifies that such updates should be ignored, but that is generally
* a bad idea.
*
* BGP attribute table has several hooks:
*
* export - Hook that validates and normalizes attribute during export phase.
* Receives eattr, may modify it (e.g., sort community lists for canonical
* representation), UNSET() it (e.g., skip empty lists), or WITHDRAW() it if
* necessary. May assume that eattr has value valid w.r.t. its type, but may be
* invalid w.r.t. BGP constraints. Optional.
*
* encode - Hook that converts internal representation to external one during
* packet writing. Receives eattr and puts it in the buffer (including attribute
* header). Returns number of bytes, or -1 if not enough space. May assume that
* eattr has value valid w.r.t. its type and validated by export hook. Mandatory
* for all known attributes that exist internally after export phase (i.e., all
* except pseudoattributes MP_(UN)REACH_NLRI).
*
* decode - Hook that converts external representation to internal one during
* packet parsing. Receives attribute data in buffer, validates it and adds
* attribute to ea_list. If data are invalid, steps DISCARD(), WITHDRAW() or
* bgp_parse_error() may be used to escape. Mandatory for all known attributes.
*
* format - Optional hook that converts eattr to textual representation.
*/
struct bgp_attr_desc {
const char *name;
uint type;
uint flags;
void (*export)(struct bgp_export_state *s, eattr *a);
int (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size);
void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to);
void (*format)(eattr *ea, byte *buf, uint size);
};
static const struct bgp_attr_desc bgp_attr_table[];
static inline int bgp_attr_known(uint code);
eattr *
bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val)
{
ASSERT(bgp_attr_known(code));
return ea_set_attr(
attrs,
pool,
EA_CODE(PROTOCOL_BGP, code),
flags,
bgp_attr_table[code].type,
val
);
}
#define REPORT(msg, args...) \
({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
#define DISCARD(msg, args...) \
({ REPORT(msg, ## args); return; })
#define WITHDRAW(msg, args...) \
({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
#define UNSET(a) \
({ a->type = EAF_TYPE_UNDEF; return; })
#define NEW_BGP "Discarding %s attribute received from AS4-aware neighbor"
#define BAD_EBGP "Discarding %s attribute received from EBGP neighbor"
#define BAD_LENGTH "Malformed %s attribute - invalid length (%u)"
#define BAD_VALUE "Malformed %s attribute - invalid value (%u)"
#define NO_MANDATORY "Missing mandatory %s attribute"
static inline int
bgp_put_attr_hdr3(byte *buf, uint code, uint flags, uint len)
{
*buf++ = flags;
*buf++ = code;
*buf++ = len;
return 3;
}
static inline int
bgp_put_attr_hdr4(byte *buf, uint code, uint flags, uint len)
{
*buf++ = flags | BAF_EXT_LEN;
*buf++ = code;
put_u16(buf, len);
return 4;
}
static inline int
bgp_put_attr_hdr(byte *buf, uint code, uint flags, uint len)
{
if (len < 256)
return bgp_put_attr_hdr3(buf, code, flags, len);
else
return bgp_put_attr_hdr4(buf, code, flags, len);
}
static int
bgp_encode_u8(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
{
if (size < (3+1))
return -1;
bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 1);
buf[3] = a->u.data;
return 3+1;
}
static int
bgp_encode_u32(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
{
if (size < (3+4))
return -1;
bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 4);
put_u32(buf+3, a->u.data);
return 3+4;
}
static int
bgp_encode_u32s(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
{
uint len = a->u.ptr->length;
if (size < (4+len))
return -1;
uint hdr = bgp_put_attr_hdr(buf, EA_ID(a->id), a->flags, len);
put_u32s(buf + hdr, (u32 *) a->u.ptr->data, len / 4);
return hdr + len;
}
static int
bgp_put_attr(byte *buf, uint size, uint code, uint flags, byte *data, uint len)
{
if (size < (4+len))
return -1;
uint hdr = bgp_put_attr_hdr(buf, code, flags, len);
memcpy(buf + hdr, data, len);
return hdr + len;
}
static int
bgp_encode_raw(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
{
return bgp_put_attr(buf, size, EA_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length);
}
/*
* Attribute hooks
*/
static void
bgp_export_origin(struct bgp_export_state *s, eattr *a)
{
if (a->u.data > 2)
WITHDRAW(BAD_VALUE, "ORIGIN", a->u.data);
}
static void
bgp_decode_origin(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
{
if (len != 1)
WITHDRAW(BAD_LENGTH, "ORIGIN", len);
if (data[0] > 2)
WITHDRAW(BAD_VALUE, "ORIGIN", data[0]);
bgp_set_attr_u32(to, s->pool, BA_ORIGIN, flags, data[0]);
}
static void
bgp_format_origin(eattr *a, byte *buf, uint size UNUSED)
{
static const char *bgp_origin_names[] = { "IGP", "EGP", "Incomplete" };
bsprintf(buf, (a->u.data <= 2) ? bgp_origin_names[a->u.data] : "?");
}
static int
bgp_encode_as_path(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
{
byte *data = a->u.ptr->data;
uint len = a->u.ptr->length;
if (!s->as4_session)
{
/* Prepare 16-bit AS_PATH (from 32-bit one) in a temporary buffer */
byte *src = data;
data = alloca(len);
len = as_path_32to16(data, src, len);
}
return bgp_put_attr(buf, size, BA_AS_PATH, a->flags, data, len);
}
static void
bgp_decode_as_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
{
struct bgp_proto *p = s->proto;
int as_length = s->as4_session ? 4 : 2;
int as_confed = p->cf->confederation && p->is_interior;
char err[128];
if (!as_path_valid(data, len, as_length, as_confed, err, sizeof(err)))
WITHDRAW("Malformed AS_PATH attribute - %s", err);
/* In some circumstances check for initial AS_CONFED_SEQUENCE; RFC 5065 5.0 */
if (p->is_interior && !p->is_internal &&
((len < 2) || (data[0] != AS_PATH_CONFED_SEQUENCE)))
WITHDRAW("Malformed AS_PATH attribute - %s", "missing initial AS_CONFED_SEQUENCE");
if (!s->as4_session)
{
/* Prepare 32-bit AS_PATH (from 16-bit one) in a temporary buffer */
byte *src = data;
data = alloca(2*len);
len = as_path_16to32(data, src, len);
}
bgp_set_attr_data(to, s->pool, BA_AS_PATH, flags, data, len);
}
static int
bgp_encode_next_hop(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
{
/*
* The NEXT_HOP attribute is used only in traditional (IPv4) BGP. In MP-BGP,
* the next hop is encoded as a part of the MP_REACH_NLRI attribute, so we
* store it and encode it later by AFI-specific hooks.
*/
if (!s->mp_reach)
{
// ASSERT(a->u.ptr->length == sizeof(ip_addr));
/* FIXME: skip IPv6 next hops for IPv4 routes during MRT dump */
ip_addr *addr = (void *) a->u.ptr->data;
if ((a->u.ptr->length != sizeof(ip_addr)) || !ipa_is_ip4(*addr))
return 0;
if (size < (3+4))
return -1;
bgp_put_attr_hdr3(buf, BA_NEXT_HOP, a->flags, 4);
put_ip4(buf+3, ipa_to_ip4(*addr));
return 3+4;
}
else
{
s->mp_next_hop = a;
return 0;
}
}
static void
bgp_decode_next_hop(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
{
if (len != 4)
WITHDRAW(BAD_LENGTH, "NEXT_HOP", len);
/* Semantic checks are done later */
s->ip_next_hop_len = len;
s->ip_next_hop_data = data;
}
/* TODO: This function should use AF-specific hook */
static void
bgp_format_next_hop(eattr *a, byte *buf, uint size UNUSED)
{
ip_addr *nh = (void *) a->u.ptr->data;
uint len = a->u.ptr->length;
ASSERT((len == 16) || (len == 32));
/* in IPv6, we may have two addresses in NEXT HOP */
if ((len == 16) || ipa_zero(nh[1]))
bsprintf(buf, "%I", nh[0]);
else
bsprintf(buf, "%I %I", nh[0], nh[1]);
}
static void
bgp_decode_med(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
{
if (len != 4)
WITHDRAW(BAD_LENGTH, "MULTI_EXIT_DISC", len);
u32 val = get_u32(data);
bgp_set_attr_u32(to, s->pool, BA_MULTI_EXIT_DISC, flags, val);
}
static void
bgp_export_local_pref(struct bgp_export_state *s, eattr *a)
{
if (!s->proto->is_interior && !s->proto->cf->allow_local_pref)
UNSET(a);
}
static void
bgp_decode_local_pref(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
{
if (!s->proto->is_interior && !s->proto->cf->allow_local_pref)
DISCARD(BAD_EBGP, "LOCAL_PREF");
if (len != 4)
WITHDRAW(BAD_LENGTH, "LOCAL_PREF", len);
u32 val = get_u32(data);
bgp_set_attr_u32(to, s->pool, BA_LOCAL_PREF, flags, val);
}
static void
bgp_decode_atomic_aggr(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data UNUSED, uint len, ea_list **to)
{
if (len != 0)
DISCARD(BAD_LENGTH, "ATOMIC_AGGR", len);
bgp_set_attr_data(to, s->pool, BA_ATOMIC_AGGR, flags, NULL, 0);
}
static int
bgp_encode_aggregator(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
{
byte *data = a->u.ptr->data;
uint len = a->u.ptr->length;
if (!s->as4_session)
{
/* Prepare 16-bit AGGREGATOR (from 32-bit one) in a temporary buffer */
byte *src = data;
data = alloca(6);
len = aggregator_32to16(data, src);
}
return bgp_put_attr(buf, size, BA_AGGREGATOR, a->flags, data, len);
}
static void
bgp_decode_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
{
if (len != (s->as4_session ? 8 : 6))
DISCARD(BAD_LENGTH, "AGGREGATOR", len);
if (!s->as4_session)
{
/* Prepare 32-bit AGGREGATOR (from 16-bit one) in a temporary buffer */
byte *src = data;
data = alloca(8);
len = aggregator_16to32(data, src);
}
bgp_set_attr_data(to, s->pool, BA_AGGREGATOR, flags, data, len);
}
static void
bgp_format_aggregator(eattr *a, byte *buf, uint size UNUSED)
{
byte *data = a->u.ptr->data;
bsprintf(buf, "%I4 AS%u", get_ip4(data+4), get_u32(data+0));
}
static void
bgp_export_community(struct bgp_export_state *s, eattr *a)
{
if (a->u.ptr->length == 0)
UNSET(a);
a->u.ptr = int_set_sort(s->pool, a->u.ptr);
}
static void
bgp_decode_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
{
if (!len || (len % 4))
WITHDRAW(BAD_LENGTH, "COMMUNITY", len);
struct adata *ad = lp_alloc_adata(s->pool, len);
get_u32s(data, (u32 *) ad->data, len / 4);
bgp_set_attr_ptr(to, s->pool, BA_COMMUNITY, flags, ad);
}
static void
bgp_export_originator_id(struct bgp_export_state *s, eattr *a)
{
if (!s->proto->is_internal)
UNSET(a);
}
static void
bgp_decode_originator_id(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
{
if (!s->proto->is_internal)
DISCARD(BAD_EBGP, "ORIGINATOR_ID");
if (len != 4)
WITHDRAW(BAD_LENGTH, "ORIGINATOR_ID", len);
u32 val = get_u32(data);
bgp_set_attr_u32(to, s->pool, BA_ORIGINATOR_ID, flags, val);
}
static void
bgp_export_cluster_list(struct bgp_export_state *s UNUSED, eattr *a)
{
if (!s->proto->is_internal)
UNSET(a);
if (a->u.ptr->length == 0)
UNSET(a);
}
static void
bgp_decode_cluster_list(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
{
if (!s->proto->is_internal)
DISCARD(BAD_EBGP, "CLUSTER_LIST");
if (!len || (len % 4))
WITHDRAW(BAD_LENGTH, "CLUSTER_LIST", len);
struct adata *ad = lp_alloc_adata(s->pool, len);
get_u32s(data, (u32 *) ad->data, len / 4);
bgp_set_attr_ptr(to, s->pool, BA_CLUSTER_LIST, flags, ad);
}
static void
bgp_format_cluster_list(eattr *a, byte *buf, uint size)
{
/* Truncates cluster lists larger than buflen, probably not a problem */
int_set_format(a->u.ptr, 0, -1, buf, size);
}
static inline u32
get_af3(byte *buf)
{
return (get_u16(buf) << 16) | buf[2];
}
static void
bgp_decode_mp_reach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
{
/*
* 2 B MP_REACH_NLRI data - Address Family Identifier
* 1 B MP_REACH_NLRI data - Subsequent Address Family Identifier
* 1 B MP_REACH_NLRI data - Length of Next Hop Network Address
* var MP_REACH_NLRI data - Network Address of Next Hop
* 1 B MP_REACH_NLRI data - Reserved (zero)
* var MP_REACH_NLRI data - Network Layer Reachability Information
*/
if ((len < 5) || (len < (5 + (uint) data[3])))
bgp_parse_error(s, 9);
s->mp_reach_af = get_af3(data);
s->mp_next_hop_len = data[3];
s->mp_next_hop_data = data + 4;
s->mp_reach_len = len - 5 - s->mp_next_hop_len;
s->mp_reach_nlri = data + 5 + s->mp_next_hop_len;
}
static void
bgp_decode_mp_unreach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
{
/*
* 2 B MP_UNREACH_NLRI data - Address Family Identifier
* 1 B MP_UNREACH_NLRI data - Subsequent Address Family Identifier
* var MP_UNREACH_NLRI data - Network Layer Reachability Information
*/
if (len < 3)
bgp_parse_error(s, 9);
s->mp_unreach_af = get_af3(data);
s->mp_unreach_len = len - 3;
s->mp_unreach_nlri = data + 3;
}
static void
bgp_export_ext_community(struct bgp_export_state *s, eattr *a)
{
a->u.ptr = ec_set_del_nontrans(s->pool, a->u.ptr);
if (a->u.ptr->length == 0)
UNSET(a);
ec_set_sort_x(a->u.ptr);
}
static void
bgp_decode_ext_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
{
if (!len || (len % 8))
WITHDRAW(BAD_LENGTH, "EXT_COMMUNITY", len);
struct adata *ad = lp_alloc_adata(s->pool, len);
get_u32s(data, (u32 *) ad->data, len / 4);
bgp_set_attr_ptr(to, s->pool, BA_EXT_COMMUNITY, flags, ad);
}
static void
bgp_decode_as4_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
{
if (s->as4_session)
DISCARD(NEW_BGP, "AS4_AGGREGATOR");
if (len != 8)
DISCARD(BAD_LENGTH, "AS4_AGGREGATOR", len);
bgp_set_attr_data(to, s->pool, BA_AS4_AGGREGATOR, flags, data, len);
}
static void
bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
{
char err[128];
if (s->as4_session)
DISCARD(NEW_BGP, "AS4_PATH");
if (len < 6)
DISCARD(BAD_LENGTH, "AS4_PATH", len);
if (!as_path_valid(data, len, 4, 1, err, sizeof(err)))
DISCARD("Malformed AS4_PATH attribute - %s", err);
struct adata *a = lp_alloc_adata(s->pool, len);
memcpy(a->data, data, len);
/* AS_CONFED* segments are invalid in AS4_PATH; RFC 6793 6 */
if (as_path_contains_confed(a))
{
REPORT("Discarding AS_CONFED* segment from AS4_PATH attribute");
a = as_path_strip_confed(s->pool, a);
}
bgp_set_attr_ptr(to, s->pool, BA_AS4_PATH, flags, a);
}
static void
bgp_export_large_community(struct bgp_export_state *s, eattr *a)
{
if (a->u.ptr->length == 0)
UNSET(a);
a->u.ptr = lc_set_sort(s->pool, a->u.ptr);
}
static void
bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
{
if (!len || (len % 12))
WITHDRAW(BAD_LENGTH, "LARGE_COMMUNITY", len);
struct adata *ad = lp_alloc_adata(s->pool, len);
get_u32s(data, (u32 *) ad->data, len / 4);
bgp_set_attr_ptr(to, s->pool, BA_LARGE_COMMUNITY, flags, ad);
}
static void
bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a)
{
net_addr *n = s->route->net->n.addr;
u32 *labels = (u32 *) a->u.ptr->data;
uint lnum = a->u.ptr->length / 4;
/* Perhaps we should just ignore it? */
if (!s->mpls)
WITHDRAW("Unexpected MPLS stack");
/* Empty MPLS stack is not allowed */
if (!lnum)
WITHDRAW("Malformed MPLS stack - empty");
/* This is ugly, but we must ensure that labels fit into NLRI field */
if ((24*lnum + (net_is_vpn(n) ? 64 : 0) + net_pxlen(n)) > 255)
WITHDRAW("Malformed MPLS stack - too many labels (%u)", lnum);
for (uint i = 0; i < lnum; i++)
{
if (labels[i] > 0xfffff)
WITHDRAW("Malformed MPLS stack - invalid label (%u)", labels[i]);
/* TODO: Check for special-purpose label values? */
}
}
static int
bgp_encode_mpls_label_stack(struct bgp_write_state *s, eattr *a, byte *buf UNUSED, uint size UNUSED)
{
/*
* MPLS labels are encoded as a part of the NLRI in MP_REACH_NLRI attribute,
* so we store MPLS_LABEL_STACK and encode it later by AFI-specific hooks.
*/
s->mpls_labels = a->u.ptr;
return 0;
}
static void
bgp_decode_mpls_label_stack(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data UNUSED, uint len UNUSED, ea_list **to UNUSED)
{
DISCARD("Discarding received attribute #0");
}
static void
bgp_format_mpls_label_stack(eattr *a, byte *buf, uint size)
{
u32 *labels = (u32 *) a->u.ptr->data;
uint lnum = a->u.ptr->length / 4;
char *pos = buf;
for (uint i = 0; i < lnum; i++)
{
if (size < 20)
{
bsprintf(pos, "...");
return;
}
uint l = bsprintf(pos, "%d/", labels[i]);
ADVANCE(pos, size, l);
}
/* Clear last slash or terminate empty string */
pos[lnum ? -1 : 0] = 0;
}
static inline void
bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
{
/* Cannot use bgp_set_attr_data() as it works on known attributes only */
ea_set_attr_data(to, s->pool, EA_CODE(PROTOCOL_BGP, code), flags, EAF_TYPE_OPAQUE, data, len);
}
/*
* Attribute table
*/
static const struct bgp_attr_desc bgp_attr_table[] = {
[BA_ORIGIN] = {
.name = "origin",
.type = EAF_TYPE_INT,
.flags = BAF_TRANSITIVE,
.export = bgp_export_origin,
.encode = bgp_encode_u8,
.decode = bgp_decode_origin,
.format = bgp_format_origin,
},
[BA_AS_PATH] = {
.name = "as_path",
.type = EAF_TYPE_AS_PATH,
.flags = BAF_TRANSITIVE,
.encode = bgp_encode_as_path,
.decode = bgp_decode_as_path,
},
[BA_NEXT_HOP] = {
.name = "next_hop",
.type = EAF_TYPE_IP_ADDRESS,
.flags = BAF_TRANSITIVE,
.encode = bgp_encode_next_hop,
.decode = bgp_decode_next_hop,
.format = bgp_format_next_hop,
},
[BA_MULTI_EXIT_DISC] = {
.name = "med",
.type = EAF_TYPE_INT,
.flags = BAF_OPTIONAL,
.encode = bgp_encode_u32,
.decode = bgp_decode_med,
},
[BA_LOCAL_PREF] = {
.name = "local_pref",
.type = EAF_TYPE_INT,
.flags = BAF_TRANSITIVE,
.export = bgp_export_local_pref,
.encode = bgp_encode_u32,
.decode = bgp_decode_local_pref,
},
[BA_ATOMIC_AGGR] = {
.name = "atomic_aggr",
.type = EAF_TYPE_OPAQUE,
.flags = BAF_TRANSITIVE,
.encode = bgp_encode_raw,
.decode = bgp_decode_atomic_aggr,
},
[BA_AGGREGATOR] = {
.name = "aggregator",
.type = EAF_TYPE_OPAQUE,
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
.encode = bgp_encode_aggregator,
.decode = bgp_decode_aggregator,
.format = bgp_format_aggregator,
},
[BA_COMMUNITY] = {
.name = "community",
.type = EAF_TYPE_INT_SET,
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
.export = bgp_export_community,
.encode = bgp_encode_u32s,
.decode = bgp_decode_community,
},
[BA_ORIGINATOR_ID] = {
.name = "originator_id",
.type = EAF_TYPE_ROUTER_ID,
.flags = BAF_OPTIONAL,
.export = bgp_export_originator_id,
.encode = bgp_encode_u32,
.decode = bgp_decode_originator_id,
},
[BA_CLUSTER_LIST] = {
.name = "cluster_list",
.type = EAF_TYPE_INT_SET,
.flags = BAF_OPTIONAL,
.export = bgp_export_cluster_list,
.encode = bgp_encode_u32s,
.decode = bgp_decode_cluster_list,
.format = bgp_format_cluster_list,
},
[BA_MP_REACH_NLRI] = {
.name = "mp_reach_nlri",
.type = EAF_TYPE_OPAQUE,
.flags = BAF_OPTIONAL,
.decode = bgp_decode_mp_reach_nlri,
},
[BA_MP_UNREACH_NLRI] = {
.name = "mp_unreach_nlri",
.type = EAF_TYPE_OPAQUE,
.flags = BAF_OPTIONAL,
.decode = bgp_decode_mp_unreach_nlri,
},
[BA_EXT_COMMUNITY] = {
.name = "ext_community",
.type = EAF_TYPE_EC_SET,
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
.export = bgp_export_ext_community,
.encode = bgp_encode_u32s,
.decode = bgp_decode_ext_community,
},
[BA_AS4_PATH] = {
.name = "as4_path",
.type = EAF_TYPE_AS_PATH,
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
.encode = bgp_encode_raw,
.decode = bgp_decode_as4_path,
},
[BA_AS4_AGGREGATOR] = {
.name = "as4_aggregator",
.type = EAF_TYPE_OPAQUE,
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
.encode = bgp_encode_raw,
.decode = bgp_decode_as4_aggregator,
.format = bgp_format_aggregator,
},
[BA_LARGE_COMMUNITY] = {
.name = "large_community",
.type = EAF_TYPE_LC_SET,
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
.export = bgp_export_large_community,
.encode = bgp_encode_u32s,
.decode = bgp_decode_large_community,
},
[BA_MPLS_LABEL_STACK] = {
.name = "mpls_label_stack",
.type = EAF_TYPE_INT_SET,
.export = bgp_export_mpls_label_stack,
.encode = bgp_encode_mpls_label_stack,
.decode = bgp_decode_mpls_label_stack,
.format = bgp_format_mpls_label_stack,
},
};
static inline int
bgp_attr_known(uint code)
{
return (code < ARRAY_SIZE(bgp_attr_table)) && bgp_attr_table[code].name;
}
/*
* Attribute export
*/
static inline void
bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to)
{
if (EA_PROTO(a->id) != PROTOCOL_BGP)
return;
uint code = EA_ID(a->id);
if (bgp_attr_known(code))
{
const struct bgp_attr_desc *desc = &bgp_attr_table[code];
/* The flags might have been zero if the attr was added by filters */
a->flags = (a->flags & BAF_PARTIAL) | desc->flags;
/* Set partial bit if new opt-trans attribute is attached to non-local route */
if ((s->src != NULL) && (a->type & EAF_ORIGINATED) &&
(a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE))
a->flags |= BAF_PARTIAL;
/* Call specific hook */
CALL(desc->export, s, a);
/* Attribute might become undefined in hook */
if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF)
return;
}
else
{
/* Don't re-export unknown non-transitive attributes */
if (!(a->flags & BAF_TRANSITIVE))
return;
a->flags |= BAF_PARTIAL;
}
/* Append updated attribute */
to->attrs[to->count++] = *a;
}
/**
* bgp_export_attrs - export BGP attributes
* @s: BGP export state
* @attrs: a list of extended attributes
*
* The bgp_export_attrs() function takes a list of attributes and merges it to
* one newly allocated and sorted segment. Attributes are validated and
* normalized by type-specific export hooks and attribute flags are updated.
* Some attributes may be eliminated (e.g. unknown non-tranitive attributes, or
* empty community sets).
*
* Result: one sorted attribute list segment, or NULL if attributes are unsuitable.
*/
static inline ea_list *
bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs)
{
/* Merge the attribute list */
ea_list *new = lp_alloc(s->pool, ea_scan(attrs));
ea_merge(attrs, new);
ea_sort(new);
uint i, count;
count = new->count;
new->count = 0;
/* Export each attribute */
for (i = 0; i < count; i++)
bgp_export_attr(s, &new->attrs[i], new);
if (s->err_withdraw)
return NULL;
return new;
}
/*
* Attribute encoding
*/
static inline int
bgp_encode_attr(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
{
ASSERT(EA_PROTO(a->id) == PROTOCOL_BGP);
uint code = EA_ID(a->id);
if (bgp_attr_known(code))
return bgp_attr_table[code].encode(s, a, buf, size);
else
return bgp_encode_raw(s, a, buf, size);
}
/**
* bgp_encode_attrs - encode BGP attributes
* @s: BGP write state
* @attrs: a list of extended attributes
* @buf: buffer
* @end: buffer end
*
* The bgp_encode_attrs() function takes a list of extended attributes
* and converts it to its BGP representation (a part of an Update message).
* BGP write state may be fake when called from MRT protocol.
*
* Result: Length of the attribute block generated or -1 if not enough space.
*/
int
bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte *end)
{
byte *pos = buf;
int i, len;
for (i = 0; i < attrs->count; i++)
{
len = bgp_encode_attr(s, &attrs->attrs[i], pos, end - pos);
if (len < 0)
return -1;
pos += len;
}
return pos - buf;
}
/*
* Attribute decoding
*/
static void bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool);
static inline int
bgp_as_path_loopy(struct bgp_proto *p, ea_list *attrs, u32 asn)
{
eattr *e = bgp_find_attr(attrs, BA_AS_PATH);
int num = p->cf->allow_local_as + 1;
return (e && (num > 0) && as_path_contains(e->u.ptr, asn, num));
}
static inline int
bgp_originator_id_loopy(struct bgp_proto *p, ea_list *attrs)
{
eattr *e = bgp_find_attr(attrs, BA_ORIGINATOR_ID);
return (e && (e->u.data == p->local_id));
}
static inline int
bgp_cluster_list_loopy(struct bgp_proto *p, ea_list *attrs)
{
eattr *e = bgp_find_attr(attrs, BA_CLUSTER_LIST);
return (e && int_set_contains(e->u.ptr, p->rr_cluster_id));
}
static inline void
bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
{
/* Handle duplicate attributes; RFC 7606 3 (g) */
if (BIT32_TEST(s->attrs_seen, code))
{
if ((code == BA_MP_REACH_NLRI) || (code == BA_MP_UNREACH_NLRI))
bgp_parse_error(s, 1);
else
DISCARD("Discarding duplicate attribute (code %u)", code);
}
BIT32_SET(s->attrs_seen, code);
if (bgp_attr_known(code))
{
const struct bgp_attr_desc *desc = &bgp_attr_table[code];
/* Handle conflicting flags; RFC 7606 3 (c) */
if ((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE))
WITHDRAW("Malformed %s attribute - conflicting flags (%02x)", desc->name, flags);
desc->decode(s, code, flags, data, len, to);
}
else /* Unknown attribute */
{
if (!(flags & BAF_OPTIONAL))
WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags);
bgp_decode_unknown(s, code, flags, data, len, to);
}
}
/**
* bgp_decode_attrs - check and decode BGP attributes
* @s: BGP parse state
* @data: start of attribute block
* @len: length of attribute block
*
* This function takes a BGP attribute block (a part of an Update message), checks
* its consistency and converts it to a list of BIRD route attributes represented
* by an (uncached) &rta.
*/
ea_list *
bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len)
{
struct bgp_proto *p = s->proto;
ea_list *attrs = NULL;
uint code, flags, alen;
byte *pos = data;
/* Parse the attributes */
while (len)
{
alen = 0;
/* Read attribute type */
if (len < 2)
goto framing_error;
flags = pos[0];
code = pos[1];
ADVANCE(pos, len, 2);
/* Read attribute length */
if (flags & BAF_EXT_LEN)
{
if (len < 2)
goto framing_error;
alen = get_u16(pos);
ADVANCE(pos, len, 2);
}
else
{
if (len < 1)
goto framing_error;
alen = *pos;
ADVANCE(pos, len, 1);
}
if (alen > len)
goto framing_error;
DBG("Attr %02x %02x %u\n", code, flags, alen);
bgp_decode_attr(s, code, flags, pos, alen, &attrs);
ADVANCE(pos, len, alen);
}
if (s->err_withdraw)
goto withdraw;
/* If there is no reachability NLRI, we are finished */
if (!s->ip_reach_len && !s->mp_reach_len)
return NULL;
/* Handle missing mandatory attributes; RFC 7606 3 (d) */
if (!BIT32_TEST(s->attrs_seen, BA_ORIGIN))
{ REPORT(NO_MANDATORY, "ORIGIN"); goto withdraw; }
if (!BIT32_TEST(s->attrs_seen, BA_AS_PATH))
{ REPORT(NO_MANDATORY, "AS_PATH"); goto withdraw; }
if (s->ip_reach_len && !BIT32_TEST(s->attrs_seen, BA_NEXT_HOP))
{ REPORT(NO_MANDATORY, "NEXT_HOP"); goto withdraw; }
/* When receiving attributes from non-AS4-aware BGP speaker, we have to
reconstruct AS_PATH and AGGREGATOR attributes; RFC 6793 4.2.3 */
if (!p->as4_session)
bgp_process_as4_attrs(&attrs, s->pool);
/* Reject routes with our ASN in AS_PATH attribute */
if (bgp_as_path_loopy(p, attrs, p->local_as))
goto withdraw;
/* Reject routes with our Confederation ID in AS_PATH attribute; RFC 5065 4.0 */
if ((p->public_as != p->local_as) && bgp_as_path_loopy(p, attrs, p->public_as))
goto withdraw;
/* Reject routes with our Router ID in ORIGINATOR_ID attribute; RFC 4456 8 */
if (p->is_internal && bgp_originator_id_loopy(p, attrs))
goto withdraw;
/* Reject routes with our Cluster ID in CLUSTER_LIST attribute; RFC 4456 8 */
if (p->rr_client && bgp_cluster_list_loopy(p, attrs))
goto withdraw;
/* If there is no local preference, define one */
if (!BIT32_TEST(s->attrs_seen, BA_LOCAL_PREF))
bgp_set_attr_u32(&attrs, s->pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
return attrs;
framing_error:
/* RFC 7606 4 - handle attribute framing errors */
REPORT("Malformed attribute list - framing error (%u/%u) at %d",
alen, len, (int) (pos - s->attrs));
withdraw:
/* RFC 7606 5.2 - handle missing NLRI during errors */
if (!s->ip_reach_len && !s->mp_reach_len)
bgp_parse_error(s, 1);
s->err_withdraw = 1;
return NULL;
}
/*
* Route bucket hash table
*/
#define RBH_KEY(b) b->eattrs, b->hash
#define RBH_NEXT(b) b->next
#define RBH_EQ(a1,h1,a2,h2) h1 == h2 && ea_same(a1, a2)
#define RBH_FN(a,h) h
#define RBH_REHASH bgp_rbh_rehash
#define RBH_PARAMS /8, *2, 2, 2, 8, 20
HASH_DEFINE_REHASH_FN(RBH, struct bgp_bucket)
void
bgp_init_bucket_table(struct bgp_channel *c)
{
HASH_INIT(c->bucket_hash, c->pool, 8);
init_list(&c->bucket_queue);
c->withdraw_bucket = NULL;
}
void
bgp_free_bucket_table(struct bgp_channel *c)
{
HASH_FREE(c->bucket_hash);
struct bgp_bucket *b;
WALK_LIST_FIRST(b, c->bucket_queue)
{
rem_node(&b->send_node);
mb_free(b);
}
mb_free(c->withdraw_bucket);
c->withdraw_bucket = NULL;
}
static struct bgp_bucket *
bgp_get_bucket(struct bgp_channel *c, ea_list *new)
{
/* Hash and lookup */
u32 hash = ea_hash(new);
struct bgp_bucket *b = HASH_FIND(c->bucket_hash, RBH, new, hash);
if (b)
return b;
uint ea_size = sizeof(ea_list) + new->count * sizeof(eattr);
uint ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN);
uint size = sizeof(struct bgp_bucket) + ea_size_aligned;
uint i;
byte *dest;
/* Gather total size of non-inline attributes */
for (i = 0; i < new->count; i++)
{
eattr *a = &new->attrs[i];
if (!(a->type & EAF_EMBEDDED))
size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN);
}
/* Create the bucket */
b = mb_alloc(c->pool, size);
init_list(&b->prefixes);
b->hash = hash;
/* Copy list of extended attributes */
memcpy(b->eattrs, new, ea_size);
dest = ((byte *) b->eattrs) + ea_size_aligned;
/* Copy values of non-inline attributes */
for (i = 0; i < new->count; i++)
{
eattr *a = &b->eattrs->attrs[i];
if (!(a->type & EAF_EMBEDDED))
{
struct adata *oa = a->u.ptr;
struct adata *na = (struct adata *) dest;
memcpy(na, oa, sizeof(struct adata) + oa->length);
a->u.ptr = na;
dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN);
}
}
/* Insert the bucket to send queue and bucket hash */
add_tail(&c->bucket_queue, &b->send_node);
HASH_INSERT2(c->bucket_hash, RBH, c->pool, b);
return b;
}
static struct bgp_bucket *
bgp_get_withdraw_bucket(struct bgp_channel *c)
{
if (!c->withdraw_bucket)
{
c->withdraw_bucket = mb_allocz(c->pool, sizeof(struct bgp_bucket));
init_list(&c->withdraw_bucket->prefixes);
}
return c->withdraw_bucket;
}
void
bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b)
{
rem_node(&b->send_node);
HASH_REMOVE2(c->bucket_hash, RBH, c->pool, b);
mb_free(b);
}
void
bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b)
{
rem_node(&b->send_node);
add_tail(&c->bucket_queue, &b->send_node);
}
void
bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
{
struct bgp_proto *p = (void *) c->c.proto;
struct bgp_bucket *wb = bgp_get_withdraw_bucket(c);
log(L_ERR "%s: Attribute list too long", p->p.name);
while (!EMPTY_LIST(b->prefixes))
{
struct bgp_prefix *px = HEAD(b->prefixes);
log(L_ERR "%s: - withdrawing %N", p->p.name, &px->net);
rem_node(&px->buck_node);
add_tail(&wb->prefixes, &px->buck_node);
}
}
/*
* Prefix hash table
*/
#define PXH_KEY(px) px->net, px->path_id, px->hash
#define PXH_NEXT(px) px->next
#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && i1 == i2 && net_equal(n1, n2)
#define PXH_FN(n,i,h) h
#define PXH_REHASH bgp_pxh_rehash
#define PXH_PARAMS /8, *2, 2, 2, 8, 24
HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix)
void
bgp_init_prefix_table(struct bgp_channel *c)
{
HASH_INIT(c->prefix_hash, c->pool, 8);
uint alen = net_addr_length[c->c.net_type];
c->prefix_slab = alen ? sl_new(c->pool, sizeof(struct bgp_prefix) + alen) : NULL;
}
void
bgp_free_prefix_table(struct bgp_channel *c)
{
HASH_FREE(c->prefix_hash);
rfree(c->prefix_slab);
c->prefix_slab = NULL;
}
static struct bgp_prefix *
bgp_get_prefix(struct bgp_channel *c, net_addr *net, u32 path_id)
{
u32 hash = net_hash(net) ^ u32_hash(path_id);
struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id, hash);
if (px)
{
rem_node(&px->buck_node);
return px;
}
if (c->prefix_slab)
px = sl_alloc(c->prefix_slab);
else
px = mb_alloc(c->pool, sizeof(struct bgp_prefix) + net->length);
px->buck_node.next = NULL;
px->buck_node.prev = NULL;
px->hash = hash;
px->path_id = path_id;
net_copy(px->net, net);
HASH_INSERT2(c->prefix_hash, PXH, c->pool, px);
return px;
}
void
bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px)
{
rem_node(&px->buck_node);
HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px);
if (c->prefix_slab)
sl_free(c->prefix_slab, px);
else
mb_free(px);
}
/*
* BGP protocol glue
*/
int
bgp_preexport(struct proto *P, rte **new, struct linpool *pool UNUSED)
{
rte *e = *new;
struct proto *SRC = e->attrs->src->proto;
struct bgp_proto *p = (struct bgp_proto *) P;
struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (struct bgp_proto *) SRC : NULL;
/* Reject our routes */
if (src == p)
return -1;
/* Accept non-BGP routes */
if (src == NULL)
return 0;
/* IBGP route reflection, RFC 4456 */
if (p->is_internal && src->is_internal && (p->local_as == src->local_as))
{
/* Rejected unless configured as route reflector */
if (!p->rr_client && !src->rr_client)
return -1;
/* Generally, this should be handled when path is received, but we check it
also here as rr_cluster_id may be undefined or different in src. */
if (p->rr_cluster_id && bgp_cluster_list_loopy(p, e->attrs->eattrs))
return -1;
}
/* Handle well-known communities, RFC 1997 */
struct eattr *c;
if (p->cf->interpret_communities &&
(c = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY))))
{
struct adata *d = c->u.ptr;
/* Do not export anywhere */
if (int_set_contains(d, BGP_COMM_NO_ADVERTISE))
return -1;
/* Do not export outside of AS (or member-AS) */
if (!p->is_internal && int_set_contains(d, BGP_COMM_NO_EXPORT_SUBCONFED))
return -1;
/* Do not export outside of AS (or confederation) */
if (!p->is_interior && int_set_contains(d, BGP_COMM_NO_EXPORT))
return -1;
/* Do not export LLGR_STALE routes to LLGR-ignorant peers */
if (!p->conn->remote_caps->llgr_aware && int_set_contains(d, BGP_COMM_LLGR_STALE))
return -1;
}
return 0;
}
static adata null_adata; /* adata of length 0 */
static ea_list *
bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *attrs0, struct linpool *pool)
{
struct proto *SRC = e->attrs->src->proto;
struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (void *) SRC : NULL;
struct bgp_export_state s = { .proto = p, .channel = c, .pool = pool, .src = src, .route = e, .mpls = c->desc->mpls };
ea_list *attrs = attrs0;
eattr *a;
adata *ad;
/* ORIGIN attribute - mandatory, attach if missing */
if (! bgp_find_attr(attrs0, BA_ORIGIN))
bgp_set_attr_u32(&attrs, pool, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
/* AS_PATH attribute - mandatory */
a = bgp_find_attr(attrs0, BA_AS_PATH);
ad = a ? a->u.ptr : &null_adata;
/* AS_PATH attribute - strip AS_CONFED* segments outside confederation */
if ((!p->cf->confederation || !p->is_interior) && as_path_contains_confed(ad))
ad = as_path_strip_confed(pool, ad);
/* AS_PATH attribute - keep or prepend ASN */
if (p->is_internal || p->rs_client)
{
/* IBGP or route server -> just ensure there is one */
if (!a)
bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, &null_adata);
}
else if (p->is_interior)
{
/* Confederation -> prepend ASN as AS_CONFED_SEQUENCE */
ad = as_path_prepend2(pool, ad, AS_PATH_CONFED_SEQUENCE, p->public_as);
bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
}
else /* Regular EBGP (no RS, no confederation) */
{
/* Regular EBGP -> prepend ASN as regular sequence */
ad = as_path_prepend2(pool, ad, AS_PATH_SEQUENCE, p->public_as);
bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
/* MULTI_EXIT_DESC attribute - accept only if set in export filter */
a = bgp_find_attr(attrs0, BA_MULTI_EXIT_DISC);
if (a && !(a->type & EAF_FRESH))
bgp_unset_attr(&attrs, pool, BA_MULTI_EXIT_DISC);
}
/* NEXT_HOP attribute - delegated to AF-specific hook */
a = bgp_find_attr(attrs0, BA_NEXT_HOP);
bgp_update_next_hop(&s, a, &attrs);
/* LOCAL_PREF attribute - required for IBGP, attach if missing */
if (p->is_interior && ! bgp_find_attr(attrs0, BA_LOCAL_PREF))
bgp_set_attr_u32(&attrs, pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
/* IBGP route reflection, RFC 4456 */
if (src && src->is_internal && p->is_internal && (src->local_as == p->local_as))
{
/* ORIGINATOR_ID attribute - attach if not already set */
if (! bgp_find_attr(attrs0, BA_ORIGINATOR_ID))
bgp_set_attr_u32(&attrs, pool, BA_ORIGINATOR_ID, 0, src->remote_id);
/* CLUSTER_LIST attribute - prepend cluster ID */
a = bgp_find_attr(attrs0, BA_CLUSTER_LIST);
ad = a ? a->u.ptr : NULL;
/* Prepend src cluster ID */
if (src->rr_cluster_id)
ad = int_set_prepend(pool, ad, src->rr_cluster_id);
/* Prepend dst cluster ID if src and dst clusters are different */
if (p->rr_cluster_id && (src->rr_cluster_id != p->rr_cluster_id))
ad = int_set_prepend(pool, ad, p->rr_cluster_id);
/* Should be at least one prepended cluster ID */
bgp_set_attr_ptr(&attrs, pool, BA_CLUSTER_LIST, 0, ad);
}
/* AS4_* transition attributes, RFC 6793 4.2.2 */
if (! p->as4_session)
{
a = bgp_find_attr(attrs, BA_AS_PATH);
if (a && as_path_contains_as4(a->u.ptr))
{
bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr));
bgp_set_attr_ptr(&attrs, pool, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr));
}
a = bgp_find_attr(attrs, BA_AGGREGATOR);
if (a && aggregator_contains_as4(a->u.ptr))
{
bgp_set_attr_ptr(&attrs, pool, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr));
bgp_set_attr_ptr(&attrs, pool, BA_AS4_AGGREGATOR, 0, a->u.ptr);
}
}
/*
* Presence of mandatory attributes ORIGIN and AS_PATH is ensured by above
* conditions. Presence and validity of quasi-mandatory NEXT_HOP attribute
* should be checked in AF-specific hooks.
*/
/* Apply per-attribute export hooks for validatation and normalization */
return bgp_export_attrs(&s, attrs);
}
void
bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old)
{
struct bgp_proto *p = (void *) P;
struct bgp_channel *c = (void *) C;
struct bgp_bucket *buck;
struct bgp_prefix *px;
u32 path;
if (new)
{
struct ea_list *attrs = bgp_update_attrs(p, c, new, new->attrs->eattrs, bgp_linpool2);
/* If attributes are invalid, we fail back to withdraw */
buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c);
path = new->attrs->src->global_id;
lp_flush(bgp_linpool2);
}
else
{
buck = bgp_get_withdraw_bucket(c);
path = old->attrs->src->global_id;
}
px = bgp_get_prefix(c, n->n.addr, c->add_path_tx ? path : 0);
add_tail(&buck->prefixes, &px->buck_node);
bgp_schedule_packet(p->conn, c, PKT_UPDATE);
}
static inline u32
bgp_get_neighbor(rte *r)
{
eattr *e = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
u32 as;
if (e && as_path_get_first_regular(e->u.ptr, &as))
return as;
/* If AS_PATH is not defined, we treat rte as locally originated */
struct bgp_proto *p = (void *) r->attrs->src->proto;
return p->cf->confederation ?: p->local_as;
}
static inline int
rte_resolvable(rte *rt)
{
return rt->attrs->dest == RTD_UNICAST;
}
static inline int
rte_stale(rte *r)
{
if (r->u.bgp.stale < 0)
{
/* If staleness is unknown, compute and cache it */
eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY));
r->u.bgp.stale = a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE);
}
return r->u.bgp.stale;
}
int
bgp_rte_better(rte *new, rte *old)
{
struct bgp_proto *new_bgp = (struct bgp_proto *) new->attrs->src->proto;
struct bgp_proto *old_bgp = (struct bgp_proto *) old->attrs->src->proto;
eattr *x, *y;
u32 n, o;
/* Skip suppressed routes (see bgp_rte_recalculate()) */
n = new->u.bgp.suppressed;
o = old->u.bgp.suppressed;
if (n > o)
return 0;
if (n < o)
return 1;
/* RFC 4271 9.1.2.1. Route resolvability test */
n = rte_resolvable(new);
o = rte_resolvable(old);
if (n > o)
return 1;
if (n < o)
return 0;
/* LLGR draft - depreference stale routes */
n = rte_stale(new);
o = rte_stale(old);
if (n > o)
return 0;
if (n < o)
return 1;
/* Start with local preferences */
x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
n = x ? x->u.data : new_bgp->cf->default_local_pref;
o = y ? y->u.data : old_bgp->cf->default_local_pref;
if (n > o)
return 1;
if (n < o)
return 0;
/* RFC 4271 9.1.2.2. a) Use AS path lengths */
if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths)
{
x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
if (n < o)
return 1;
if (n > o)
return 0;
}
/* RFC 4271 9.1.2.2. b) Use origins */
x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
n = x ? x->u.data : ORIGIN_INCOMPLETE;
o = y ? y->u.data : ORIGIN_INCOMPLETE;
if (n < o)
return 1;
if (n > o)
return 0;
/* RFC 4271 9.1.2.2. c) Compare MED's */
/* Proper RFC 4271 path selection cannot be interpreted as finding
* the best path in some ordering. It is implemented partially in
* bgp_rte_recalculate() when deterministic_med option is
* active. Without that option, the behavior is just an
* approximation, which in specific situations may lead to
* persistent routing loops, because it is nondeterministic - it
* depends on the order in which routes appeared. But it is also the
* same behavior as used by default in Cisco routers, so it is
* probably not a big issue.
*/
if (new_bgp->cf->med_metric || old_bgp->cf->med_metric ||
(bgp_get_neighbor(new) == bgp_get_neighbor(old)))
{
x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
n = x ? x->u.data : new_bgp->cf->default_med;
o = y ? y->u.data : old_bgp->cf->default_med;
if (n < o)
return 1;
if (n > o)
return 0;
}
/* RFC 4271 9.1.2.2. d) Prefer external peers */
if (new_bgp->is_interior > old_bgp->is_interior)
return 0;
if (new_bgp->is_interior < old_bgp->is_interior)
return 1;
/* RFC 4271 9.1.2.2. e) Compare IGP metrics */
n = new_bgp->cf->igp_metric ? new->attrs->igp_metric : 0;
o = old_bgp->cf->igp_metric ? old->attrs->igp_metric : 0;
if (n < o)
return 1;
if (n > o)
return 0;
/* RFC 4271 9.1.2.2. f) Compare BGP identifiers */
/* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighbor ID */
x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID));
y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID));
n = x ? x->u.data : new_bgp->remote_id;
o = y ? y->u.data : old_bgp->remote_id;
/* RFC 5004 - prefer older routes */
/* (if both are external and from different peer) */
if ((new_bgp->cf->prefer_older || old_bgp->cf->prefer_older) &&
!new_bgp->is_internal && n != o)
return 0;
/* rest of RFC 4271 9.1.2.2. f) */
if (n < o)
return 1;
if (n > o)
return 0;
/* RFC 4456 9. b) Compare cluster list lengths */
x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST));
y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST));
n = x ? int_set_get_size(x->u.ptr) : 0;
o = y ? int_set_get_size(y->u.ptr) : 0;
if (n < o)
return 1;
if (n > o)
return 0;
/* RFC 4271 9.1.2.2. g) Compare peer IP adresses */
return ipa_compare(new_bgp->remote_ip, old_bgp->remote_ip) < 0;
}
int
bgp_rte_mergable(rte *pri, rte *sec)
{
struct bgp_proto *pri_bgp = (struct bgp_proto *) pri->attrs->src->proto;
struct bgp_proto *sec_bgp = (struct bgp_proto *) sec->attrs->src->proto;
eattr *x, *y;
u32 p, s;
/* Skip suppressed routes (see bgp_rte_recalculate()) */
if (pri->u.bgp.suppressed != sec->u.bgp.suppressed)
return 0;
/* RFC 4271 9.1.2.1. Route resolvability test */
if (!rte_resolvable(sec))
return 0;
/* LLGR draft - depreference stale routes */
if (rte_stale(pri) != rte_stale(sec))
return 0;
/* Start with local preferences */
x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
p = x ? x->u.data : pri_bgp->cf->default_local_pref;
s = y ? y->u.data : sec_bgp->cf->default_local_pref;
if (p != s)
return 0;
/* RFC 4271 9.1.2.2. a) Use AS path lengths */
if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths)
{
x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
if (p != s)
return 0;
// if (DELTA(p, s) > pri_bgp->cf->relax_multipath)
// return 0;
}
/* RFC 4271 9.1.2.2. b) Use origins */
x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
p = x ? x->u.data : ORIGIN_INCOMPLETE;
s = y ? y->u.data : ORIGIN_INCOMPLETE;
if (p != s)
return 0;
/* RFC 4271 9.1.2.2. c) Compare MED's */
if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric ||
(bgp_get_neighbor(pri) == bgp_get_neighbor(sec)))
{
x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
p = x ? x->u.data : pri_bgp->cf->default_med;
s = y ? y->u.data : sec_bgp->cf->default_med;
if (p != s)
return 0;
}
/* RFC 4271 9.1.2.2. d) Prefer external peers */
if (pri_bgp->is_interior != sec_bgp->is_interior)
return 0;
/* RFC 4271 9.1.2.2. e) Compare IGP metrics */
p = pri_bgp->cf->igp_metric ? pri->attrs->igp_metric : 0;
s = sec_bgp->cf->igp_metric ? sec->attrs->igp_metric : 0;
if (p != s)
return 0;
/* Remaining criteria are ignored */
return 1;
}
static inline int
same_group(rte *r, u32 lpref, u32 lasn)
{
return (r->pref == lpref) && (bgp_get_neighbor(r) == lasn);
}
static inline int
use_deterministic_med(rte *r)
{
struct proto *P = r->attrs->src->proto;
return (P->proto == &proto_bgp) && ((struct bgp_proto *) P)->cf->deterministic_med;
}
int
bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
{
rte *r, *s;
rte *key = new ? new : old;
u32 lpref = key->pref;
u32 lasn = bgp_get_neighbor(key);
int old_is_group_best = 0;
/*
* Proper RFC 4271 path selection is a bit complicated, it cannot be
* implemented just by rte_better(), because it is not a linear
* ordering. But it can be splitted to two levels, where the lower
* level chooses the best routes in each group of routes from the
* same neighboring AS and higher level chooses the best route (with
* a slightly different ordering) between the best-in-group routes.
*
* When deterministic_med is disabled, we just ignore this issue and
* choose the best route by bgp_rte_better() alone. If enabled, the
* lower level of the route selection is done here (for the group
* to which the changed route belongs), all routes in group are
* marked as suppressed, just chosen best-in-group is not.
*
* Global best route selection then implements higher level by
* choosing between non-suppressed routes (as they are always
* preferred over suppressed routes). Routes from BGP protocols
* that do not set deterministic_med are just never suppressed. As
* they do not participate in the lower level selection, it is OK
* that this fn is not called for them.
*
* The idea is simple, the implementation is more problematic,
* mostly because of optimizations in rte_recalculate() that
* avoids full recalculation in most cases.
*
* We can assume that at least one of new, old is non-NULL and both
* are from the same protocol with enabled deterministic_med. We
* group routes by both neighbor AS (lasn) and preference (lpref),
* because bgp_rte_better() does not handle preference itself.
*/
/* If new and old are from different groups, we just process that
as two independent events */
if (new && old && !same_group(old, lpref, lasn))
{
int i1, i2;
i1 = bgp_rte_recalculate(table, net, NULL, old, old_best);
i2 = bgp_rte_recalculate(table, net, new, NULL, old_best);
return i1 || i2;
}
/*
* We could find the best-in-group and then make some shortcuts like
* in rte_recalculate, but as we would have to walk through all
* net->routes just to find it, it is probably not worth. So we
* just have two simpler fast cases that use just the old route.
* We also set suppressed flag to avoid using it in bgp_rte_better().
*/
if (new)
new->u.bgp.suppressed = 1;
if (old)
{
old_is_group_best = !old->u.bgp.suppressed;
old->u.bgp.suppressed = 1;
int new_is_better = new && bgp_rte_better(new, old);
/* The first case - replace not best with worse (or remove not best) */
if (!old_is_group_best && !new_is_better)
return 0;
/* The second case - replace the best with better */
if (old_is_group_best && new_is_better)
{
/* new is best-in-group, the see discussion below - this is
a special variant of NBG && OBG. From OBG we can deduce
that same_group(old_best) iff (old == old_best) */
new->u.bgp.suppressed = 0;
return (old == old_best);
}
}
/* The default case - find a new best-in-group route */
r = new; /* new may not be in the list */
for (s=net->routes; rte_is_valid(s); s=s->next)
if (use_deterministic_med(s) && same_group(s, lpref, lasn))
{
s->u.bgp.suppressed = 1;
if (!r || bgp_rte_better(s, r))
r = s;
}
/* Simple case - the last route in group disappears */
if (!r)
return 0;
/* Found best-in-group */
r->u.bgp.suppressed = 0;
/*
* There are generally two reasons why we have to force
* recalculation (return 1): First, the new route may be wrongfully
* chosen to be the best in the first case check in
* rte_recalculate(), this may happen only if old_best is from the
* same group. Second, another (different than new route)
* best-in-group is chosen and that may be the proper best (although
* rte_recalculate() without ignore that possibility).
*
* There are three possible cases according to whether the old route
* was the best in group (OBG, stored in old_is_group_best) and
* whether the new route is the best in group (NBG, tested by r == new).
* These cases work even if old or new is NULL.
*
* NBG -> new is a possible candidate for the best route, so we just
* check for the first reason using same_group().
*
* !NBG && OBG -> Second reason applies, return 1
*
* !NBG && !OBG -> Best in group does not change, old != old_best,
* rte_better(new, old_best) is false and therefore
* the first reason does not apply, return 0
*/
if (r == new)
return old_best && same_group(old_best, lpref, lasn);
else
return old_is_group_best;
}
struct rte *
bgp_rte_modify_stale(struct rte *r, struct linpool *pool)
{
eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY));
struct adata *ad = a ? a->u.ptr : NULL;
uint flags = a ? a->flags : BAF_PARTIAL;
if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR))
return NULL;
if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE))
return r;
r = rte_cow_rta(r, pool);
bgp_set_attr_ptr(&(r->attrs->eattrs), pool, BA_COMMUNITY, flags,
int_set_add(pool, ad, BGP_COMM_LLGR_STALE));
r->u.bgp.stale = 1;
return r;
}
/*
* Reconstruct AS_PATH and AGGREGATOR according to RFC 6793 4.2.3
*/
static void
bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool)
{
eattr *p2 = bgp_find_attr(*attrs, BA_AS_PATH);
eattr *p4 = bgp_find_attr(*attrs, BA_AS4_PATH);
eattr *a2 = bgp_find_attr(*attrs, BA_AGGREGATOR);
eattr *a4 = bgp_find_attr(*attrs, BA_AS4_AGGREGATOR);
/* First, unset AS4_* attributes */
if (p4) bgp_unset_attr(attrs, pool, BA_AS4_PATH);
if (a4) bgp_unset_attr(attrs, pool, BA_AS4_AGGREGATOR);
/* Handle AGGREGATOR attribute */
if (a2 && a4)
{
u32 a2_asn = get_u32(a2->u.ptr->data);
/* If routes were aggregated by an old router, then AS4_PATH and
AS4_AGGREGATOR are invalid. In that case we give up. */
if (a2_asn != AS_TRANS)
return;
/* Use AS4_AGGREGATOR instead of AGGREGATOR */
a2->u.ptr = a4->u.ptr;
}
/* Handle AS_PATH attribute */
if (p2 && p4)
{
/* Both as_path_getlen() and as_path_cut() take AS_CONFED* as zero length */
int p2_len = as_path_getlen(p2->u.ptr);
int p4_len = as_path_getlen(p4->u.ptr);
/* AS_PATH is too short, give up */
if (p2_len < p4_len)
return;
/* Merge AS_PATH and AS4_PATH */
as_path_cut(p2->u.ptr, p2_len - p4_len);
p2->u.ptr = as_path_merge(pool, p2->u.ptr, p4->u.ptr);
}
}
int
bgp_get_attr(eattr *a, byte *buf, int buflen)
{
uint i = EA_ID(a->id);
const struct bgp_attr_desc *d;
int len;
if (bgp_attr_known(i))
{
d = &bgp_attr_table[i];
len = bsprintf(buf, "%s", d->name);
buf += len;
if (d->format)
{
*buf++ = ':';
*buf++ = ' ';
d->format(a, buf, buflen - len - 2);
return GA_FULL;
}
return GA_NAME;
}
bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : "");
return GA_NAME;
}
void
bgp_get_route_info(rte *e, byte *buf)
{
eattr *p = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
eattr *o = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
u32 origas;
buf += bsprintf(buf, " (%d", e->pref);
if (e->u.bgp.suppressed)
buf += bsprintf(buf, "-");
if (rte_stale(e))
buf += bsprintf(buf, "s");
if (e->attrs->hostentry)
{
if (!rte_resolvable(e))
buf += bsprintf(buf, "/-");
else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN)
buf += bsprintf(buf, "/?");
else
buf += bsprintf(buf, "/%d", e->attrs->igp_metric);
}
buf += bsprintf(buf, ") [");
if (p && as_path_get_last(p->u.ptr, &origas))
buf += bsprintf(buf, "AS%u", origas);
if (o)
buf += bsprintf(buf, "%c", "ie?"[o->u.data]);
strcpy(buf, "]");
}