bird/proto/bgp/attrs.c
Ondrej Zajicek 11b32d9117 Major changes to BGP
Fixes two race conditions causing crash of Bird, several unhandled
cases during BGP initialization, and some other bugs. Also changes
handling of startup delay to be more useful and implement
reporting of last error in 'show protocols' command.
2008-12-19 01:34:39 +01:00

1363 lines
34 KiB
C

/*
* BIRD -- BGP Attributes
*
* (c) 2000 Martin Mares <mj@ucw.cz>
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
#undef LOCAL_DEBUG
#include <stdlib.h>
#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
#include "nest/route.h"
#include "nest/attrs.h"
#include "conf/conf.h"
#include "lib/resource.h"
#include "lib/string.h"
#include "lib/unaligned.h"
#include "bgp.h"
static byte bgp_mandatory_attrs[] = { BA_ORIGIN, BA_AS_PATH
#ifndef IPV6
,BA_NEXT_HOP
#endif
};
struct attr_desc {
char *name;
int expected_length;
int expected_flags;
int type;
int allow_in_ebgp;
int (*validate)(struct bgp_proto *p, byte *attr, int len);
void (*format)(eattr *ea, byte *buf, int buflen);
};
static int
bgp_check_origin(struct bgp_proto *p UNUSED, byte *a UNUSED, int len)
{
if (len > 2)
return 6;
return 0;
}
static void
bgp_format_origin(eattr *a, byte *buf, int buflen)
{
static char *bgp_origin_names[] = { "IGP", "EGP", "Incomplete" };
bsprintf(buf, bgp_origin_names[a->u.data]);
}
static int
bgp_check_path(byte *a, int len, int bs, int errcode)
{
while (len)
{
DBG("Path segment %02x %02x\n", a[0], a[1]);
if (len < 2 ||
(a[0] != AS_PATH_SET && a[0] != AS_PATH_SEQUENCE) ||
bs * a[1] + 2 > len)
return errcode;
len -= bs * a[1] + 2;
a += bs * a[1] + 2;
}
return 0;
}
static int
bgp_check_as_path(struct bgp_proto *p, byte *a, int len)
{
return bgp_check_path(a, len, p->as4_session ? 4 : 2, 11);
}
static int
bgp_check_as4_path(struct bgp_proto *p, byte *a, int len)
{
if (bgp_as4_support && (! p->as4_session))
return bgp_check_path(a, len, 4, 9);
else
return 0;
}
static int
bgp_check_next_hop(struct bgp_proto *p UNUSED, byte *a, int len)
{
#ifdef IPV6
return -1;
#else
ip_addr addr;
memcpy(&addr, a, len);
ipa_ntoh(addr);
if (ipa_classify(addr) & IADDR_HOST)
return 0;
else
return 8;
#endif
}
static int
bgp_check_aggregator(struct bgp_proto *p, byte *a UNUSED, int len)
{
int exp_len = p->as4_session ? 8 : 6;
return (len == exp_len) ? 0 : 5;
}
static int
bgp_check_cluster_list(struct bgp_proto *p UNUSED, byte *a UNUSED, int len)
{
return ((len % 4) == 0) ? 0 : 5;
}
static void
bgp_format_cluster_list(eattr *a, byte *buf, int buflen UNUSED)
{
int_set_format(a->u.ptr, 0, buf, buflen);
}
static int
bgp_check_reach_nlri(struct bgp_proto *p UNUSED, byte *a UNUSED, int len UNUSED)
{
#ifdef IPV6
p->mp_reach_start = a;
p->mp_reach_len = len;
#endif
return -1;
}
static int
bgp_check_unreach_nlri(struct bgp_proto *p UNUSED, byte *a UNUSED, int len UNUSED)
{
#ifdef IPV6
p->mp_unreach_start = a;
p->mp_unreach_len = len;
#endif
return -1;
}
static struct attr_desc bgp_attr_table[] = {
{ NULL, -1, 0, 0, 0, /* Undefined */
NULL, NULL },
{ "origin", 1, BAF_TRANSITIVE, EAF_TYPE_INT, 1, /* BA_ORIGIN */
bgp_check_origin, bgp_format_origin },
{ "as_path", -1, BAF_TRANSITIVE, EAF_TYPE_AS_PATH, 1, /* BA_AS_PATH */
bgp_check_as_path, NULL },
{ "next_hop", 4, BAF_TRANSITIVE, EAF_TYPE_IP_ADDRESS, 1, /* BA_NEXT_HOP */
bgp_check_next_hop, NULL },
{ "med", 4, BAF_OPTIONAL, EAF_TYPE_INT, 1, /* BA_MULTI_EXIT_DISC */
NULL, NULL },
{ "local_pref", 4, BAF_TRANSITIVE, EAF_TYPE_INT, 0, /* BA_LOCAL_PREF */
NULL, NULL },
{ "atomic_aggr", 0, BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_ATOMIC_AGGR */
NULL, NULL },
{ "aggregator", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AGGREGATOR */
bgp_check_aggregator, NULL },
{ "community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_INT_SET, 1, /* BA_COMMUNITY */
NULL, NULL },
{ "originator_id", 4, BAF_OPTIONAL, EAF_TYPE_ROUTER_ID, 0, /* BA_ORIGINATOR_ID */
NULL, NULL },
{ "cluster_list", -1, BAF_OPTIONAL, EAF_TYPE_INT_SET, 0, /* BA_CLUSTER_LIST */
bgp_check_cluster_list, bgp_format_cluster_list },
{ NULL, }, /* BA_DPA */
{ NULL, }, /* BA_ADVERTISER */
{ NULL, }, /* BA_RCID_PATH */
{ "mp_reach_nlri", -1, BAF_OPTIONAL, EAF_TYPE_OPAQUE, 1, /* BA_MP_REACH_NLRI */
bgp_check_reach_nlri, NULL },
{ "mp_unreach_nlri", -1, BAF_OPTIONAL, EAF_TYPE_OPAQUE, 1, /* BA_MP_UNREACH_NLRI */
bgp_check_unreach_nlri, NULL },
{ NULL, }, /* BA_EXTENDED_COMM */
{ "as4_path", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AS4_PATH */
bgp_check_as4_path, NULL },
{ "as4_aggregator", 8, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AS4_PATH */
NULL, NULL }
};
/* BA_AS4_PATH is type EAF_TYPE_OPAQUE and not type EAF_TYPE_AS_PATH because
* EAF_TYPE_AS_PATH is supposed to have different format (2 or 4 B for each ASN)
* depending on bgp_as4_support variable.
*/
#define ATTR_KNOWN(code) ((code) < ARRAY_SIZE(bgp_attr_table) && bgp_attr_table[code].name)
static inline struct adata *
bgp_alloc_adata(struct linpool *pool, unsigned len)
{
struct adata *ad = lp_alloc(pool, sizeof(struct adata) + len);
ad->length = len;
return ad;
}
static void
bgp_set_attr(eattr *e, unsigned attr, uintptr_t val)
{
ASSERT(ATTR_KNOWN(attr));
e->id = EA_CODE(EAP_BGP, attr);
e->type = bgp_attr_table[attr].type;
e->flags = bgp_attr_table[attr].expected_flags;
if (e->type & EAF_EMBEDDED)
e->u.data = val;
else
e->u.ptr = (struct adata *) val;
}
static byte *
bgp_set_attr_wa(eattr *e, struct linpool *pool, unsigned attr, unsigned len)
{
struct adata *ad = bgp_alloc_adata(pool, len);
bgp_set_attr(e, attr, (uintptr_t) ad);
return ad->data;
}
void
bgp_attach_attr(ea_list **to, struct linpool *pool, unsigned attr, uintptr_t val)
{
ea_list *a = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr));
a->next = *to;
*to = a;
a->flags = EALF_SORTED;
a->count = 1;
bgp_set_attr(a->attrs, attr, val);
}
byte *
bgp_attach_attr_wa(ea_list **to, struct linpool *pool, unsigned attr, unsigned len)
{
struct adata *ad = bgp_alloc_adata(pool, len);
bgp_attach_attr(to, pool, attr, (uintptr_t) ad);
return ad->data;
}
static int
bgp_encode_attr_hdr(byte *dst, unsigned int flags, unsigned code, int len)
{
int wlen;
DBG("\tAttribute %02x (%d bytes, flags %02x)\n", code, len, flags);
if (len < 256)
{
*dst++ = flags;
*dst++ = code;
*dst++ = len;
wlen = 3;
}
else
{
*dst++ = flags | BAF_EXT_LEN;
*dst++ = code;
put_u16(dst, len);
wlen = 4;
}
return wlen;
}
static void
aggregator_convert_to_old(struct adata *aggr, byte *dst, int *new_used)
{
byte *src = aggr->data;
*new_used = 0;
u32 as = get_u32(src);
if (as > 0xFFFF)
{
as = AS_TRANS;
*new_used = 1;
}
put_u16(dst, as);
/* Copy IPv4 address */
memcpy(dst + 2, src + 4, 4);
}
static void
aggregator_convert_to_new(struct adata *aggr, byte *dst)
{
byte *src = aggr->data;
u32 as = get_u16(src);
put_u32(dst, as);
/* Copy IPv4 address */
memcpy(dst + 4, src + 2, 4);
}
static int
bgp_get_attr_len(eattr *a)
{
int len;
if (ATTR_KNOWN(EA_ID(a->id)))
{
int code = EA_ID(a->id);
struct attr_desc *desc = &bgp_attr_table[code];
len = desc->expected_length;
if (len < 0)
{
ASSERT(!(a->type & EAF_EMBEDDED));
len = a->u.ptr->length;
}
}
else
{
ASSERT((a->type & EAF_TYPE_MASK) == EAF_TYPE_OPAQUE);
len = a->u.ptr->length;
}
return len;
}
#define ADVANCE(w, r, l) do { r -= l; w += l; } while (0)
/**
* bgp_encode_attrs - encode BGP attributes
* @p: BGP instance
* @w: buffer
* @attrs: a list of extended attributes
* @remains: remaining space in the buffer
*
* The bgp_encode_attrs() function takes a list of extended attributes
* and converts it to its BGP representation (a part of an Update message).
*
* Result: Length of the attribute block generated.
*/
unsigned int
bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains)
{
unsigned int i, code, flags;
byte *start = w;
int len, rv;
for(i=0; i<attrs->count; i++)
{
eattr *a = &attrs->attrs[i];
ASSERT(EA_PROTO(a->id) == EAP_BGP);
code = EA_ID(a->id);
#ifdef IPV6
/* When talking multiprotocol BGP, the NEXT_HOP attributes are used only temporarily. */
if (code == BA_NEXT_HOP)
continue;
#endif
/* When AS4-aware BGP speaker is talking to non-AS4-aware BGP speaker,
* we have to convert our 4B AS_PATH to 2B AS_PATH and send our AS_PATH
* as optional AS4_PATH attribute.
*/
if ((code == BA_AS_PATH) && bgp_as4_support && (! p->as4_session))
{
len = a->u.ptr->length;
if (remains < (len + 4))
goto err_no_buffer;
/* Using temporary buffer because don't know a length of created attr
* and therefore a length of a header. Perhaps i should better always
* use BAF_EXT_LEN. */
byte buf[len];
int new_used;
int nl = as_path_convert_to_old(a->u.ptr, buf, &new_used);
DBG("BGP: Encoding old AS_PATH\n");
rv = bgp_encode_attr_hdr(w, BAF_TRANSITIVE, BA_AS_PATH, nl);
ADVANCE(w, remains, rv);
memcpy(w, buf, nl);
ADVANCE(w, remains, nl);
if (! new_used)
continue;
if (remains < (len + 4))
goto err_no_buffer;
/* We should discard AS_CONFED_SEQUENCE or AS_CONFED_SET path segments
* here but we don't support confederations and such paths we already
* discarded in bgp_check_as_path().
*/
DBG("BGP: Encoding AS4_PATH\n");
rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AS4_PATH, len);
ADVANCE(w, remains, rv);
memcpy(w, a->u.ptr->data, len);
ADVANCE(w, remains, len);
continue;
}
/* The same issue with AGGREGATOR attribute */
if ((code == BA_AGGREGATOR) && bgp_as4_support && (! p->as4_session))
{
int new_used;
len = 6;
if (remains < (len + 3))
goto err_no_buffer;
rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AGGREGATOR, len);
ADVANCE(w, remains, rv);
aggregator_convert_to_old(a->u.ptr, w, &new_used);
ADVANCE(w, remains, len);
if (! new_used)
continue;
len = 8;
if (remains < (len + 3))
goto err_no_buffer;
rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AS4_AGGREGATOR, len);
ADVANCE(w, remains, rv);
memcpy(w, a->u.ptr->data, len);
ADVANCE(w, remains, len);
continue;
}
/* Standard path continues here ... */
flags = a->flags & (BAF_OPTIONAL | BAF_TRANSITIVE | BAF_PARTIAL);
len = bgp_get_attr_len(a);
if (remains < len + 4)
goto err_no_buffer;
rv = bgp_encode_attr_hdr(w, flags, code, len);
ADVANCE(w, remains, rv);
switch (a->type & EAF_TYPE_MASK)
{
case EAF_TYPE_INT:
case EAF_TYPE_ROUTER_ID:
if (len == 4)
put_u32(w, a->u.data);
else
*w = a->u.data;
break;
case EAF_TYPE_IP_ADDRESS:
{
ip_addr ip = *(ip_addr *)a->u.ptr->data;
ipa_hton(ip);
memcpy(w, &ip, len);
break;
}
case EAF_TYPE_INT_SET:
{
u32 *z = (u32 *)a->u.ptr->data;
int i;
for(i=0; i<len; i+=4)
put_u32(w+i, *z++);
break;
}
case EAF_TYPE_OPAQUE:
case EAF_TYPE_AS_PATH:
memcpy(w, a->u.ptr->data, len);
break;
default:
bug("bgp_encode_attrs: unknown attribute type %02x", a->type);
}
ADVANCE(w, remains, len);
}
return w - start;
err_no_buffer:
log(L_ERR "BGP: attribute list too long, ignoring the remaining attributes");
return w - start;
}
static void
bgp_init_prefix(struct fib_node *N)
{
struct bgp_prefix *p = (struct bgp_prefix *) N;
p->bucket_node.next = NULL;
}
static int
bgp_compare_u32(const u32 *x, const u32 *y)
{
return (*x < *y) ? -1 : (*x > *y) ? 1 : 0;
}
static void
bgp_normalize_set(u32 *dest, u32 *src, unsigned cnt)
{
memcpy(dest, src, sizeof(u32) * cnt);
qsort(dest, cnt, sizeof(u32), (int(*)(const void *, const void *)) bgp_compare_u32);
}
static void
bgp_rehash_buckets(struct bgp_proto *p)
{
struct bgp_bucket **old = p->bucket_hash;
struct bgp_bucket **new;
unsigned oldn = p->hash_size;
unsigned i, e, mask;
struct bgp_bucket *b;
p->hash_size = p->hash_limit;
DBG("BGP: Rehashing bucket table from %d to %d\n", oldn, p->hash_size);
p->hash_limit *= 4;
if (p->hash_limit >= 65536)
p->hash_limit = ~0;
new = p->bucket_hash = mb_allocz(p->p.pool, p->hash_size * sizeof(struct bgp_bucket *));
mask = p->hash_size - 1;
for (i=0; i<oldn; i++)
while (b = old[i])
{
old[i] = b->hash_next;
e = b->hash & mask;
b->hash_next = new[e];
if (b->hash_next)
b->hash_next->hash_prev = b;
b->hash_prev = NULL;
new[e] = b;
}
mb_free(old);
}
static struct bgp_bucket *
bgp_new_bucket(struct bgp_proto *p, ea_list *new, unsigned hash)
{
struct bgp_bucket *b;
unsigned ea_size = sizeof(ea_list) + new->count * sizeof(eattr);
unsigned ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN);
unsigned size = sizeof(struct bgp_bucket) + ea_size;
unsigned i;
byte *dest;
unsigned index = hash & (p->hash_size - 1);
/* Gather total size of non-inline attributes */
for (i=0; i<new->count; i++)
{
eattr *a = &new->attrs[i];
if (!(a->type & EAF_EMBEDDED))
size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN);
}
/* Create the bucket and hash it */
b = mb_alloc(p->p.pool, size);
b->hash_next = p->bucket_hash[index];
if (b->hash_next)
b->hash_next->hash_prev = b;
p->bucket_hash[index] = b;
b->hash_prev = NULL;
b->hash = hash;
add_tail(&p->bucket_queue, &b->send_node);
init_list(&b->prefixes);
memcpy(b->eattrs, new, ea_size);
dest = ((byte *)b->eattrs) + ea_size_aligned;
/* Copy values of non-inline attributes */
for (i=0; i<new->count; i++)
{
eattr *a = &b->eattrs->attrs[i];
if (!(a->type & EAF_EMBEDDED))
{
struct adata *oa = a->u.ptr;
struct adata *na = (struct adata *) dest;
memcpy(na, oa, sizeof(struct adata) + oa->length);
a->u.ptr = na;
dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN);
}
}
/* If needed, rehash */
p->hash_count++;
if (p->hash_count > p->hash_limit)
bgp_rehash_buckets(p);
return b;
}
static int
bgp_export_check(struct bgp_proto *p, ea_list *new)
{
eattr *a;
struct adata *d;
/* Check if next hop is valid */
a = ea_find(new, EA_CODE(EAP_BGP, BA_NEXT_HOP));
if (!a || ipa_equal(p->next_hop, *(ip_addr *)a->u.ptr))
{
DBG("\tInvalid NEXT_HOP\n");
return 0;
}
/* Check if we aren't forbidden to export the route by communities */
a = ea_find(new, EA_CODE(EAP_BGP, BA_COMMUNITY));
if (a)
{
d = a->u.ptr;
if (int_set_contains(d, BGP_COMM_NO_ADVERTISE))
{
DBG("\tNO_ADVERTISE\n");
return 0;
}
if (!p->is_internal &&
(int_set_contains(d, BGP_COMM_NO_EXPORT) ||
int_set_contains(d, BGP_COMM_NO_EXPORT_SUBCONFED)))
{
DBG("\tNO_EXPORT\n");
return 0;
}
}
return 1;
}
static struct bgp_bucket *
bgp_get_bucket(struct bgp_proto *p, ea_list *attrs, int originate)
{
ea_list *new;
unsigned i, cnt, hash, code;
eattr *a, *d;
u32 seen = 0;
struct bgp_bucket *b;
/* Merge the attribute list */
new = alloca(ea_scan(attrs));
ea_merge(attrs, new);
ea_sort(new);
/* Normalize attributes */
d = new->attrs;
cnt = new->count;
new->count = 0;
for(i=0; i<cnt; i++)
{
a = &new->attrs[i];
#ifdef LOCAL_DEBUG
{
byte buf[EA_FORMAT_BUF_SIZE];
ea_format(a, buf);
DBG("\t%s\n", buf);
}
#endif
if (EA_PROTO(a->id) != EAP_BGP)
continue;
code = EA_ID(a->id);
if (ATTR_KNOWN(code))
{
if (!bgp_attr_table[code].allow_in_ebgp && !p->is_internal)
continue;
/* The flags might have been zero if the attr was added by filters */
a->flags = (a->flags & BAF_PARTIAL) | bgp_attr_table[code].expected_flags;
if (code < 32)
seen |= 1 << code;
}
else
{
/* Don't re-export unknown non-transitive attributes */
if (!(a->flags & BAF_TRANSITIVE))
continue;
}
*d = *a;
if ((d->type & EAF_ORIGINATED) && !originate && (d->flags & BAF_TRANSITIVE) && (d->flags & BAF_OPTIONAL))
d->flags |= BAF_PARTIAL;
switch (d->type & EAF_TYPE_MASK)
{
case EAF_TYPE_INT_SET:
{
struct adata *z = alloca(sizeof(struct adata) + d->u.ptr->length);
z->length = d->u.ptr->length;
bgp_normalize_set((u32 *) z->data, (u32 *) d->u.ptr->data, z->length / 4);
d->u.ptr = z;
break;
}
default: ;
}
d++;
new->count++;
}
/* Hash */
hash = ea_hash(new);
for(b=p->bucket_hash[hash & (p->hash_size - 1)]; b; b=b->hash_next)
if (b->hash == hash && ea_same(b->eattrs, new))
{
DBG("Found bucket.\n");
return b;
}
/* Ensure that there are all mandatory attributes */
for(i=0; i<ARRAY_SIZE(bgp_mandatory_attrs); i++)
if (!(seen & (1 << bgp_mandatory_attrs[i])))
{
log(L_ERR "%s: Mandatory attribute %s missing", p->p.name, bgp_attr_table[bgp_mandatory_attrs[i]].name);
return NULL;
}
if (!bgp_export_check(p, new))
return NULL;
/* Create new bucket */
DBG("Creating bucket.\n");
return bgp_new_bucket(p, new, hash);
}
void
bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck)
{
if (buck->hash_next)
buck->hash_next->hash_prev = buck->hash_prev;
if (buck->hash_prev)
buck->hash_prev->hash_next = buck->hash_next;
else
p->bucket_hash[buck->hash & (p->hash_size-1)] = buck->hash_next;
mb_free(buck);
}
void
bgp_rt_notify(struct proto *P, net *n, rte *new, rte *old UNUSED, ea_list *attrs)
{
struct bgp_proto *p = (struct bgp_proto *) P;
struct bgp_bucket *buck;
struct bgp_prefix *px;
DBG("BGP: Got route %I/%d %s\n", n->n.prefix, n->n.pxlen, new ? "up" : "down");
if (new)
{
buck = bgp_get_bucket(p, attrs, new->attrs->source != RTS_BGP);
if (!buck) /* Inconsistent attribute list */
return;
}
else
{
if (!(buck = p->withdraw_bucket))
{
buck = p->withdraw_bucket = mb_alloc(P->pool, sizeof(struct bgp_bucket));
init_list(&buck->prefixes);
}
}
px = fib_get(&p->prefix_fib, &n->n.prefix, n->n.pxlen);
if (px->bucket_node.next)
{
DBG("\tRemoving old entry.\n");
rem_node(&px->bucket_node);
}
add_tail(&buck->prefixes, &px->bucket_node);
bgp_schedule_packet(p->conn, PKT_UPDATE);
}
static int
bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool)
{
ea_list *ea = lp_alloc(pool, sizeof(ea_list) + 4*sizeof(eattr));
rta *rta = e->attrs;
byte *z;
ea->next = *attrs;
*attrs = ea;
ea->flags = EALF_SORTED;
ea->count = 4;
bgp_set_attr(ea->attrs, BA_ORIGIN,
((rta->source == RTS_OSPF_EXT1) || (rta->source == RTS_OSPF_EXT2)) ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
if (p->is_internal)
bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, 0);
else
{
z = bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, bgp_as4_support ? 6 : 4);
z[0] = AS_PATH_SEQUENCE;
z[1] = 1; /* 1 AS */
if (bgp_as4_support)
put_u32(z+2, p->local_as);
else
put_u16(z+2, p->local_as);
}
z = bgp_set_attr_wa(ea->attrs+2, pool, BA_NEXT_HOP, sizeof(ip_addr));
if (p->cf->next_hop_self ||
!p->is_internal ||
rta->dest != RTD_ROUTER)
{
if (ipa_nonzero(p->cf->source_addr))
*(ip_addr *)z = p->cf->source_addr;
else
*(ip_addr *)z = p->local_addr;
}
else
*(ip_addr *)z = e->attrs->gw;
bgp_set_attr(ea->attrs+3, BA_LOCAL_PREF, 0);
return 0; /* Leave decision to the filters */
}
static inline int
bgp_as_path_loopy(struct bgp_proto *p, rta *a)
{
eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
return (e && as_path_is_member(e->u.ptr, p->local_as));
}
static inline int
bgp_originator_id_loopy(struct bgp_proto *p, rta *a)
{
eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
return (e && (e->u.data == p->local_id));
}
static inline int
bgp_cluster_list_loopy(struct bgp_proto *p, rta *a)
{
eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
return (e && p->rr_client && int_set_contains(e->u.ptr, p->rr_cluster_id));
}
static inline void
bgp_path_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 as)
{
eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
bgp_attach_attr(attrs, pool, BA_AS_PATH, (uintptr_t) as_path_prepend(pool, a->u.ptr, as));
}
static inline void
bgp_cluster_list_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 cid)
{
eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
bgp_attach_attr(attrs, pool, BA_CLUSTER_LIST, (uintptr_t) int_set_add(pool, a ? a->u.ptr : NULL, cid));
}
static int
bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool, int rr)
{
eattr *a;
if (!p->is_internal && !p->rs_client)
{
bgp_path_prepend(e, attrs, pool, p->local_as);
/* The MULTI_EXIT_DISC attribute received from a neighboring AS MUST NOT be
* propagated to other neighboring ASes.
* Perhaps it would be better to undefine it.
*/
a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
if (a)
bgp_attach_attr(attrs, pool, BA_MULTI_EXIT_DISC, 0);
}
a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
if (a && (p->is_internal || (!p->is_internal && e->attrs->iface == p->neigh->iface)))
{
/* Leave the original next hop attribute, will check later where does it point */
}
else
{
/* Need to create new one */
bgp_attach_attr_ip(attrs, pool, BA_NEXT_HOP, p->local_addr);
}
if (rr)
{
/* Handling route reflection, RFC 4456 */
struct bgp_proto *src = (struct bgp_proto *) e->attrs->proto;
a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
if (!a)
bgp_attach_attr(attrs, pool, BA_ORIGINATOR_ID, src->remote_id);
/* We attach proper cluster ID according to whether the route is entering or leaving the cluster */
bgp_cluster_list_prepend(e, attrs, pool, src->rr_client ? src->rr_cluster_id : p->rr_cluster_id);
/* Two RR clients with different cluster ID, hmmm */
if (src->rr_client && p->rr_client && (src->rr_cluster_id != p->rr_cluster_id))
bgp_cluster_list_prepend(e, attrs, pool, p->rr_cluster_id);
}
return 0; /* Leave decision to the filters */
}
int
bgp_import_control(struct proto *P, rte **new, ea_list **attrs, struct linpool *pool)
{
rte *e = *new;
struct bgp_proto *p = (struct bgp_proto *) P;
struct bgp_proto *new_bgp = (e->attrs->proto->proto == &proto_bgp) ? (struct bgp_proto *) e->attrs->proto : NULL;
if (p == new_bgp) /* Poison reverse updates */
return -1;
if (new_bgp)
{
/* We should check here for cluster list loop, because the receiving BGP instance
might have different cluster ID */
if (bgp_cluster_list_loopy(p, e->attrs))
return -1;
if (p->local_as == new_bgp->local_as && p->is_internal && new_bgp->is_internal)
{
/* Redistribution of internal routes with IBGP */
if (p->rr_client || new_bgp->rr_client)
/* Route reflection, RFC 4456 */
return bgp_update_attrs(p, e, attrs, pool, 1);
else
return -1;
}
else
return bgp_update_attrs(p, e, attrs, pool, 0);
}
else
return bgp_create_attrs(p, e, attrs, pool);
}
static inline u32
bgp_get_neighbor(rte *r)
{
eattr *e = ea_find(r->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
u32 as;
if (e && as_path_get_last(e->u.ptr, &as))
return as;
else
return ((struct bgp_proto *) r->attrs->proto)->remote_as;
}
int
bgp_rte_better(rte *new, rte *old)
{
struct bgp_proto *new_bgp = (struct bgp_proto *) new->attrs->proto;
struct bgp_proto *old_bgp = (struct bgp_proto *) old->attrs->proto;
eattr *x, *y;
u32 n, o;
/* Start with local preferences */
x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
n = x ? x->u.data : new_bgp->cf->default_local_pref;
o = y ? y->u.data : old_bgp->cf->default_local_pref;
if (n > o)
return 1;
if (n < o)
return 0;
/* RFC 4271 9.1.2.2. a) Use AS path lengths */
if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths)
{
x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
if (n < o)
return 1;
if (n > o)
return 0;
}
/* RFC 4271 9.1.2.2. b) Use origins */
x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
n = x ? x->u.data : ORIGIN_INCOMPLETE;
o = y ? y->u.data : ORIGIN_INCOMPLETE;
if (n < o)
return 1;
if (n > o)
return 0;
/* RFC 4271 9.1.2.2. c) Compare MED's */
if (bgp_get_neighbor(new) == bgp_get_neighbor(old))
{
x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
n = x ? x->u.data : new_bgp->cf->default_med;
o = y ? y->u.data : old_bgp->cf->default_med;
if (n < o)
return 1;
if (n > o)
return 0;
}
/* RFC 4271 9.1.2.2. d) Prefer external peers */
if (new_bgp->is_internal > old_bgp->is_internal)
return 0;
if (new_bgp->is_internal < old_bgp->is_internal)
return 1;
/* Skipping RFC 4271 9.1.2.2. e) */
/* We don't have interior distances */
/* RFC 4456 9. b) Compare cluster list lengths */
x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
n = x ? int_set_get_size(x->u.ptr) : 0;
o = y ? int_set_get_size(y->u.ptr) : 0;
if (n < o)
return 1;
if (n > o)
return 0;
/* RFC 4271 9.1.2.2. f) Compare BGP identifiers */
/* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighor ID */
x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
n = x ? x->u.data : new_bgp->remote_id;
o = y ? y->u.data : old_bgp->remote_id;
if (n < o)
return 1;
if (n > o)
return 0;
/* RFC 4271 9.1.2.2. g) Compare peer IP adresses */
return (ipa_compare(new_bgp->cf->remote_ip, old_bgp->cf->remote_ip) < 0);
}
static struct adata *
bgp_aggregator_convert_to_new(struct adata *old, struct linpool *pool)
{
struct adata *newa = lp_alloc(pool, sizeof(struct adata) + 8);
newa->length = 8;
aggregator_convert_to_new(old, newa->data);
return newa;
}
/* Take last req_as ASNs from path old2 (in 2B format), convert to 4B format
* and append path old4 (in 4B format).
*/
static struct adata *
bgp_merge_as_paths(struct adata *old2, struct adata *old4, int req_as, struct linpool *pool)
{
byte buf[old2->length * 2];
int ol = as_path_convert_to_new(old2, buf, req_as);
int nl = ol + (old4 ? old4->length : 0);
struct adata *newa = lp_alloc(pool, sizeof(struct adata) + nl);
newa->length = nl;
memcpy(newa->data, buf, ol);
if (old4) memcpy(newa->data + ol, old4->data, old4->length);
return newa;
}
/* Reconstruct 4B AS_PATH and AGGREGATOR according to RFC 4893 4.2.3 */
static void
bgp_reconstruct_4b_atts(struct bgp_proto *p, rta *a, struct linpool *pool)
{
eattr *p2 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
eattr *p4 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS4_PATH));
eattr *a2 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AGGREGATOR));
eattr *a4 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS4_AGGREGATOR));
if (a2)
{
u32 a2_as = get_u16(a2->u.ptr->data);
if (a4)
{
if (a2_as != AS_TRANS)
{
/* Routes were aggregated by old router and therefore AS4_PATH
* and AS4_AGGREGATOR is invalid
*
* Convert AS_PATH and AGGREGATOR to 4B format and finish.
*/
a2->u.ptr = bgp_aggregator_convert_to_new(a2->u.ptr, pool);
p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, NULL, AS_PATH_MAXLEN, pool);
return;
}
else
{
/* Common case, use AS4_AGGREGATOR attribute */
a2->u.ptr = a4->u.ptr;
}
}
else
{
/* Common case, use old AGGREGATOR attribute */
a2->u.ptr = bgp_aggregator_convert_to_new(a2->u.ptr, pool);
if (a2_as == AS_TRANS)
log(L_WARN "BGP: AGGREGATOR attribute contain AS_TRANS, but AS4_AGGREGATOR is missing");
}
}
else
if (a4)
log(L_WARN "BGP: AS4_AGGREGATOR attribute received, but AGGREGATOR attribute is missing");
int p2_len = as_path_getlen(p2->u.ptr);
int p4_len = p4 ? as_path_getlen(p4->u.ptr) : AS_PATH_MAXLEN;
if (p2_len < p4_len)
p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, NULL, AS_PATH_MAXLEN, pool);
else
p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, p4->u.ptr, p2_len - p4_len, pool);
}
static void
bgp_remove_as4_attrs(struct bgp_proto *p, rta *a)
{
unsigned id1 = EA_CODE(EAP_BGP, BA_AS4_PATH);
unsigned id2 = EA_CODE(EAP_BGP, BA_AS4_AGGREGATOR);
ea_list **el = &(a->eattrs);
/* We know that ea_lists constructed in bgp_decode_attrs have one attribute per ea_list struct */
while (*el != NULL)
{
unsigned fid = (*el)->attrs[0].id;
if ((fid == id1) || (fid == id2))
{
*el = (*el)->next;
if (p->as4_session)
log(L_WARN "BGP: Unexpected AS4_* attributes received");
}
else
el = &((*el)->next);
}
}
/**
* bgp_decode_attrs - check and decode BGP attributes
* @conn: connection
* @attr: start of attribute block
* @len: length of attribute block
* @pool: linear pool to make all the allocations in
* @mandatory: 1 iff presence of mandatory attributes has to be checked
*
* This function takes a BGP attribute block (a part of an Update message), checks
* its consistency and converts it to a list of BIRD route attributes represented
* by a &rta.
*/
struct rta *
bgp_decode_attrs(struct bgp_conn *conn, byte *attr, unsigned int len, struct linpool *pool, int mandatory)
{
struct bgp_proto *bgp = conn->bgp;
rta *a = lp_alloc(pool, sizeof(struct rta));
unsigned int flags, code, l, i, type;
int errcode;
byte *z, *attr_start;
byte seen[256/8];
eattr *e;
ea_list *ea;
struct adata *ad;
a->proto = &bgp->p;
a->source = RTS_BGP;
a->scope = SCOPE_UNIVERSE;
a->cast = RTC_UNICAST;
a->dest = RTD_ROUTER;
a->flags = 0;
a->aflags = 0;
a->from = bgp->cf->remote_ip;
a->eattrs = NULL;
/* Parse the attributes */
bzero(seen, sizeof(seen));
DBG("BGP: Parsing attributes\n");
while (len)
{
if (len < 2)
goto malformed;
attr_start = attr;
flags = *attr++;
code = *attr++;
len -= 2;
if (flags & BAF_EXT_LEN)
{
if (len < 2)
goto malformed;
l = get_u16(attr);
attr += 2;
len -= 2;
}
else
{
if (len < 1)
goto malformed;
l = *attr++;
len--;
}
if (l > len)
goto malformed;
len -= l;
z = attr;
attr += l;
DBG("Attr %02x %02x %d\n", code, flags, l);
if (seen[code/8] & (1 << (code%8)))
goto malformed;
if (ATTR_KNOWN(code))
{
struct attr_desc *desc = &bgp_attr_table[code];
if (desc->expected_length >= 0 && desc->expected_length != (int) l)
{ errcode = 5; goto err; }
if ((desc->expected_flags ^ flags) & (BAF_OPTIONAL | BAF_TRANSITIVE))
{ errcode = 4; goto err; }
if (!desc->allow_in_ebgp && !bgp->is_internal)
continue;
if (desc->validate)
{
errcode = desc->validate(bgp, z, l);
if (errcode > 0)
goto err;
if (errcode < 0)
continue;
}
type = desc->type;
}
else /* Unknown attribute */
{
if (!(flags & BAF_OPTIONAL))
{ errcode = 2; goto err; }
type = EAF_TYPE_OPAQUE;
}
seen[code/8] |= (1 << (code%8));
ea = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr));
ea->next = a->eattrs;
a->eattrs = ea;
ea->flags = 0;
ea->count = 1;
ea->attrs[0].id = EA_CODE(EAP_BGP, code);
ea->attrs[0].flags = flags;
ea->attrs[0].type = type;
if (type & EAF_EMBEDDED)
ad = NULL;
else
{
ad = lp_alloc(pool, sizeof(struct adata) + l);
ea->attrs[0].u.ptr = ad;
ad->length = l;
memcpy(ad->data, z, l);
}
switch (type)
{
case EAF_TYPE_ROUTER_ID:
case EAF_TYPE_INT:
if (l == 1)
ea->attrs[0].u.data = *z;
else
ea->attrs[0].u.data = get_u32(z);
break;
case EAF_TYPE_IP_ADDRESS:
ipa_ntoh(*(ip_addr *)ad->data);
break;
case EAF_TYPE_INT_SET:
{
u32 *z = (u32 *) ad->data;
for(i=0; i<ad->length/4; i++)
z[i] = ntohl(z[i]);
break;
}
}
}
#ifdef IPV6
if (seen[BA_MP_REACH_NLRI / 8] & (1 << (BA_MP_REACH_NLRI % 8)))
mandatory = 1;
#endif
/* Check if all mandatory attributes are present */
if (mandatory)
{
for(i=0; i < ARRAY_SIZE(bgp_mandatory_attrs); i++)
{
code = bgp_mandatory_attrs[i];
if (!(seen[code/8] & (1 << (code%8))))
{
bgp_error(conn, 3, 3, &bgp_mandatory_attrs[i], 1);
return NULL;
}
}
}
/* When receiving attributes from non-AS4-aware BGP speaker,
* we have to reconstruct 4B AS_PATH and AGGREGATOR attributes
*/
if (bgp_as4_support && (! bgp->as4_session))
bgp_reconstruct_4b_atts(bgp, a, pool);
if (bgp_as4_support)
bgp_remove_as4_attrs(bgp, a);
/* If the AS path attribute contains our AS, reject the routes */
if (bgp_as_path_loopy(bgp, a))
goto loop;
/* Two checks for IBGP loops caused by route reflection, RFC 4456 */
if (bgp_originator_id_loopy(bgp, a) ||
bgp_cluster_list_loopy(bgp, a))
goto loop;
/* If there's no local preference, define one */
if (!(seen[0] & (1 << BA_LOCAL_PREF)))
bgp_attach_attr(&a->eattrs, pool, BA_LOCAL_PREF, 0);
return a;
loop:
DBG("BGP: Path loop!\n");
return NULL;
malformed:
bgp_error(conn, 3, 1, NULL, 0);
return NULL;
err:
bgp_error(conn, 3, errcode, attr_start, z+l-attr_start);
return NULL;
}
int
bgp_get_attr(eattr *a, byte *buf, int buflen)
{
unsigned int i = EA_ID(a->id);
struct attr_desc *d;
if (ATTR_KNOWN(i))
{
d = &bgp_attr_table[i];
buf += bsprintf(buf, "%s", d->name);
if (d->format)
{
*buf++ = ':';
*buf++ = ' ';
d->format(a, buf, buflen);
return GA_FULL;
}
return GA_NAME;
}
bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : "");
return GA_NAME;
}
void
bgp_attr_init(struct bgp_proto *p)
{
p->hash_size = 256;
p->hash_limit = p->hash_size * 4;
p->bucket_hash = mb_allocz(p->p.pool, p->hash_size * sizeof(struct bgp_bucket *));
init_list(&p->bucket_queue);
p->withdraw_bucket = NULL;
fib_init(&p->prefix_fib, p->p.pool, sizeof(struct bgp_prefix), 0, bgp_init_prefix);
}
void
bgp_get_route_info(rte *e, byte *buf, ea_list *attrs)
{
eattr *p = ea_find(attrs, EA_CODE(EAP_BGP, BA_AS_PATH));
eattr *o = ea_find(attrs, EA_CODE(EAP_BGP, BA_ORIGIN));
u32 origas;
buf += bsprintf(buf, " (%d) [", e->pref);
if (p && as_path_get_first(p->u.ptr, &origas))
buf += bsprintf(buf, "AS%u", origas);
if (o)
buf += bsprintf(buf, "%c", "ie?"[o->u.data]);
strcpy(buf, "]");
}