82a79586e5
This patch extends the length for attributes from 1024 to 2048 (because both AS_PATH and AS4_PATH attributes take 2+4 B per AS). If there is not enough space for attributes, Bird skips that route group. Old behavior (skipping remaining attributes) leads to skipping required attributes and session drop.
1052 lines
25 KiB
C
1052 lines
25 KiB
C
/*
|
|
* BIRD -- BGP Packet Processing
|
|
*
|
|
* (c) 2000 Martin Mares <mj@ucw.cz>
|
|
*
|
|
* Can be freely distributed and used under the terms of the GNU GPL.
|
|
*/
|
|
|
|
#undef LOCAL_DEBUG
|
|
|
|
#include "nest/bird.h"
|
|
#include "nest/iface.h"
|
|
#include "nest/protocol.h"
|
|
#include "nest/route.h"
|
|
#include "nest/attrs.h"
|
|
#include "conf/conf.h"
|
|
#include "lib/unaligned.h"
|
|
#include "lib/socket.h"
|
|
|
|
#include "nest/cli.h"
|
|
|
|
#include "bgp.h"
|
|
|
|
static struct rate_limit rl_rcv_update, rl_snd_update;
|
|
|
|
static byte *
|
|
bgp_create_notification(struct bgp_conn *conn, byte *buf)
|
|
{
|
|
struct bgp_proto *p = conn->bgp;
|
|
|
|
BGP_TRACE(D_PACKETS, "Sending NOTIFICATION(code=%d.%d)", conn->notify_code, conn->notify_subcode);
|
|
buf[0] = conn->notify_code;
|
|
buf[1] = conn->notify_subcode;
|
|
memcpy(buf+2, conn->notify_data, conn->notify_size);
|
|
return buf + 2 + conn->notify_size;
|
|
}
|
|
|
|
#ifdef IPV6
|
|
static byte *
|
|
bgp_put_cap_ipv6(struct bgp_conn *conn UNUSED, byte *buf)
|
|
{
|
|
*buf++ = 1; /* Capability 1: Multiprotocol extensions */
|
|
*buf++ = 4; /* Capability data length */
|
|
*buf++ = 0; /* We support AF IPv6 */
|
|
*buf++ = BGP_AF_IPV6;
|
|
*buf++ = 0; /* RFU */
|
|
*buf++ = 1; /* and SAFI 1 */
|
|
return buf;
|
|
}
|
|
#endif
|
|
|
|
static byte *
|
|
bgp_put_cap_as4(struct bgp_conn *conn, byte *buf)
|
|
{
|
|
*buf++ = 65; /* Capability 65: Support for 4-octet AS number */
|
|
*buf++ = 4; /* Capability data length */
|
|
put_u32(buf, conn->bgp->local_as);
|
|
return buf + 4;
|
|
}
|
|
|
|
static byte *
|
|
bgp_create_open(struct bgp_conn *conn, byte *buf)
|
|
{
|
|
struct bgp_proto *p = conn->bgp;
|
|
byte *cap;
|
|
int cap_len;
|
|
|
|
BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
|
|
BGP_VERSION, p->local_as, p->cf->hold_time, p->local_id);
|
|
buf[0] = BGP_VERSION;
|
|
put_u16(buf+1, (p->local_as < 0xFFFF) ? p->local_as : AS_TRANS);
|
|
put_u16(buf+3, p->cf->hold_time);
|
|
put_u32(buf+5, p->local_id);
|
|
|
|
if (conn->start_state == BSS_CONNECT_NOCAP)
|
|
{
|
|
BGP_TRACE(D_PACKETS, "Skipping capabilities");
|
|
buf[9] = 0;
|
|
return buf + 10;
|
|
}
|
|
|
|
/* Skipped 3 B for length field and Capabilities parameter header */
|
|
cap = buf + 12;
|
|
|
|
#ifdef IPV6
|
|
cap = bgp_put_cap_ipv6(conn, cap);
|
|
#endif
|
|
if (conn->want_as4_support)
|
|
cap = bgp_put_cap_as4(conn, cap);
|
|
|
|
cap_len = cap - buf - 12;
|
|
if (cap_len > 0)
|
|
{
|
|
buf[9] = cap_len + 2; /* Optional params len */
|
|
buf[10] = 2; /* Option: Capability list */
|
|
buf[11] = cap_len; /* Option length */
|
|
return cap;
|
|
}
|
|
else
|
|
{
|
|
buf[9] = 0; /* No optional parameters */
|
|
return buf + 10;
|
|
}
|
|
}
|
|
|
|
static unsigned int
|
|
bgp_encode_prefixes(struct bgp_proto *p, byte *w, struct bgp_bucket *buck, unsigned int remains)
|
|
{
|
|
byte *start = w;
|
|
ip_addr a;
|
|
int bytes;
|
|
|
|
while (!EMPTY_LIST(buck->prefixes) && remains >= 5)
|
|
{
|
|
struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
|
|
DBG("\tDequeued route %I/%d\n", px->n.prefix, px->n.pxlen);
|
|
*w++ = px->n.pxlen;
|
|
bytes = (px->n.pxlen + 7) / 8;
|
|
a = px->n.prefix;
|
|
ipa_hton(a);
|
|
memcpy(w, &a, bytes);
|
|
w += bytes;
|
|
remains -= bytes + 1;
|
|
rem_node(&px->bucket_node);
|
|
fib_delete(&p->prefix_fib, px);
|
|
}
|
|
return w - start;
|
|
}
|
|
|
|
static void
|
|
bgp_flush_prefixes(struct bgp_proto *p, struct bgp_bucket *buck)
|
|
{
|
|
while (!EMPTY_LIST(buck->prefixes))
|
|
{
|
|
struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
|
|
log(L_ERR "%s: - route %I/%d skipped", p->p.name, px->n.prefix, px->n.pxlen);
|
|
rem_node(&px->bucket_node);
|
|
fib_delete(&p->prefix_fib, px);
|
|
}
|
|
}
|
|
|
|
#ifndef IPV6 /* IPv4 version */
|
|
|
|
static byte *
|
|
bgp_create_update(struct bgp_conn *conn, byte *buf)
|
|
{
|
|
struct bgp_proto *p = conn->bgp;
|
|
struct bgp_bucket *buck;
|
|
int remains = BGP_MAX_PACKET_LENGTH - BGP_HEADER_LENGTH - 4;
|
|
byte *w;
|
|
int wd_size = 0;
|
|
int r_size = 0;
|
|
int a_size = 0;
|
|
|
|
w = buf+2;
|
|
if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
|
|
{
|
|
DBG("Withdrawn routes:\n");
|
|
wd_size = bgp_encode_prefixes(p, w, buck, remains);
|
|
w += wd_size;
|
|
remains -= wd_size;
|
|
}
|
|
put_u16(buf, wd_size);
|
|
|
|
if (remains >= 3072)
|
|
{
|
|
while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
|
|
{
|
|
if (EMPTY_LIST(buck->prefixes))
|
|
{
|
|
DBG("Deleting empty bucket %p\n", buck);
|
|
rem_node(&buck->send_node);
|
|
bgp_free_bucket(p, buck);
|
|
continue;
|
|
}
|
|
|
|
DBG("Processing bucket %p\n", buck);
|
|
a_size = bgp_encode_attrs(p, w+2, buck->eattrs, 2048);
|
|
|
|
if (a_size < 0)
|
|
{
|
|
log(L_ERR "%s: Attribute list too long, skipping corresponding route group", p->p.name);
|
|
bgp_flush_prefixes(p, buck);
|
|
rem_node(&buck->send_node);
|
|
bgp_free_bucket(p, buck);
|
|
continue;
|
|
}
|
|
|
|
put_u16(w, a_size);
|
|
w += a_size + 2;
|
|
r_size = bgp_encode_prefixes(p, w, buck, remains - a_size);
|
|
w += r_size;
|
|
break;
|
|
}
|
|
}
|
|
if (!a_size) /* Attributes not already encoded */
|
|
{
|
|
put_u16(w, 0);
|
|
w += 2;
|
|
}
|
|
if (wd_size || r_size)
|
|
{
|
|
BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
|
|
return w;
|
|
}
|
|
else
|
|
return NULL;
|
|
}
|
|
|
|
#else /* IPv6 version */
|
|
|
|
static byte *
|
|
bgp_create_update(struct bgp_conn *conn, byte *buf)
|
|
{
|
|
struct bgp_proto *p = conn->bgp;
|
|
struct bgp_bucket *buck;
|
|
int size, is_ll;
|
|
int remains = BGP_MAX_PACKET_LENGTH - BGP_HEADER_LENGTH - 4;
|
|
byte *w, *tmp, *tstart;
|
|
ip_addr ip, ip_ll;
|
|
ea_list *ea;
|
|
eattr *nh;
|
|
neighbor *n;
|
|
|
|
put_u16(buf, 0);
|
|
w = buf+4;
|
|
|
|
if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
|
|
{
|
|
DBG("Withdrawn routes:\n");
|
|
tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8);
|
|
*tmp++ = 0;
|
|
*tmp++ = BGP_AF_IPV6;
|
|
*tmp++ = 1;
|
|
ea->attrs[0].u.ptr->length = bgp_encode_prefixes(p, tmp, buck, remains-11);
|
|
size = bgp_encode_attrs(p, w, ea, remains);
|
|
ASSERT(size >= 0);
|
|
w += size;
|
|
remains -= size;
|
|
}
|
|
|
|
if (remains >= 3072)
|
|
{
|
|
while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
|
|
{
|
|
if (EMPTY_LIST(buck->prefixes))
|
|
{
|
|
DBG("Deleting empty bucket %p\n", buck);
|
|
rem_node(&buck->send_node);
|
|
bgp_free_bucket(p, buck);
|
|
continue;
|
|
}
|
|
|
|
DBG("Processing bucket %p\n", buck);
|
|
size = bgp_encode_attrs(p, w, buck->eattrs, 2048);
|
|
|
|
if (size < 0)
|
|
{
|
|
log(L_ERR "%s: Attribute list too long, ignoring corresponding route group", p->p.name);
|
|
bgp_flush_prefixes(p, buck);
|
|
rem_node(&buck->send_node);
|
|
bgp_free_bucket(p, buck);
|
|
continue;
|
|
}
|
|
|
|
w += size;
|
|
remains -= size;
|
|
tstart = tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8);
|
|
*tmp++ = 0;
|
|
*tmp++ = BGP_AF_IPV6;
|
|
*tmp++ = 1;
|
|
nh = ea_find(buck->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
|
|
ASSERT(nh);
|
|
ip = *(ip_addr *) nh->u.ptr->data;
|
|
is_ll = 0;
|
|
if (ipa_equal(ip, p->local_addr))
|
|
{
|
|
is_ll = 1;
|
|
ip_ll = p->local_link;
|
|
}
|
|
else
|
|
{
|
|
n = neigh_find(&p->p, &ip, 0);
|
|
if (n && n->iface == p->neigh->iface)
|
|
{
|
|
/* FIXME: We are assuming the global scope addresses use the lower 64 bits
|
|
* as an interface identifier which hasn't necessarily to be true.
|
|
*/
|
|
is_ll = 1;
|
|
ip_ll = ipa_or(ipa_build(0xfe800000,0,0,0), ipa_and(ip, ipa_build(0,0,~0,~0)));
|
|
}
|
|
}
|
|
if (is_ll)
|
|
{
|
|
*tmp++ = 32;
|
|
ipa_hton(ip);
|
|
memcpy(tmp, &ip, 16);
|
|
ipa_hton(ip_ll);
|
|
memcpy(tmp+16, &ip_ll, 16);
|
|
tmp += 32;
|
|
}
|
|
else
|
|
{
|
|
*tmp++ = 16;
|
|
ipa_hton(ip);
|
|
memcpy(tmp, &ip, 16);
|
|
tmp += 16;
|
|
}
|
|
*tmp++ = 0; /* No SNPA information */
|
|
tmp += bgp_encode_prefixes(p, tmp, buck, remains - (8+3+32+1));
|
|
ea->attrs[0].u.ptr->length = tmp - tstart;
|
|
size = bgp_encode_attrs(p, w, ea, remains);
|
|
ASSERT(size >= 0);
|
|
w += size;
|
|
break;
|
|
}
|
|
}
|
|
|
|
size = w - (buf+4);
|
|
put_u16(buf+2, size);
|
|
lp_flush(bgp_linpool);
|
|
if (size)
|
|
{
|
|
BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
|
|
return w;
|
|
}
|
|
else
|
|
return NULL;
|
|
}
|
|
|
|
#endif
|
|
|
|
static void
|
|
bgp_create_header(byte *buf, unsigned int len, unsigned int type)
|
|
{
|
|
memset(buf, 0xff, 16); /* Marker */
|
|
put_u16(buf+16, len);
|
|
buf[18] = type;
|
|
}
|
|
|
|
/**
|
|
* bgp_fire_tx - transmit packets
|
|
* @conn: connection
|
|
*
|
|
* Whenever the transmit buffers of the underlying TCP connection
|
|
* are free and we have any packets queued for sending, the socket functions
|
|
* call bgp_fire_tx() which takes care of selecting the highest priority packet
|
|
* queued (Notification > Keepalive > Open > Update), assembling its header
|
|
* and body and sending it to the connection.
|
|
*/
|
|
static int
|
|
bgp_fire_tx(struct bgp_conn *conn)
|
|
{
|
|
struct bgp_proto *p = conn->bgp;
|
|
unsigned int s = conn->packets_to_send;
|
|
sock *sk = conn->sk;
|
|
byte *buf, *pkt, *end;
|
|
int type;
|
|
|
|
if (!sk)
|
|
{
|
|
conn->packets_to_send = 0;
|
|
return 0;
|
|
}
|
|
buf = sk->tbuf;
|
|
pkt = buf + BGP_HEADER_LENGTH;
|
|
|
|
if (s & (1 << PKT_SCHEDULE_CLOSE))
|
|
{
|
|
/* We can finally close connection and enter idle state */
|
|
bgp_conn_enter_idle_state(conn);
|
|
return 0;
|
|
}
|
|
if (s & (1 << PKT_NOTIFICATION))
|
|
{
|
|
s = 1 << PKT_SCHEDULE_CLOSE;
|
|
type = PKT_NOTIFICATION;
|
|
end = bgp_create_notification(conn, pkt);
|
|
}
|
|
else if (s & (1 << PKT_KEEPALIVE))
|
|
{
|
|
s &= ~(1 << PKT_KEEPALIVE);
|
|
type = PKT_KEEPALIVE;
|
|
end = pkt; /* Keepalives carry no data */
|
|
BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
|
|
bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
|
|
}
|
|
else if (s & (1 << PKT_OPEN))
|
|
{
|
|
s &= ~(1 << PKT_OPEN);
|
|
type = PKT_OPEN;
|
|
end = bgp_create_open(conn, pkt);
|
|
}
|
|
else if (s & (1 << PKT_UPDATE))
|
|
{
|
|
end = bgp_create_update(conn, pkt);
|
|
type = PKT_UPDATE;
|
|
if (!end)
|
|
{
|
|
conn->packets_to_send = 0;
|
|
return 0;
|
|
}
|
|
}
|
|
else
|
|
return 0;
|
|
conn->packets_to_send = s;
|
|
bgp_create_header(buf, end - buf, type);
|
|
return sk_send(sk, end - buf);
|
|
}
|
|
|
|
/**
|
|
* bgp_schedule_packet - schedule a packet for transmission
|
|
* @conn: connection
|
|
* @type: packet type
|
|
*
|
|
* Schedule a packet of type @type to be sent as soon as possible.
|
|
*/
|
|
void
|
|
bgp_schedule_packet(struct bgp_conn *conn, int type)
|
|
{
|
|
DBG("BGP: Scheduling packet type %d\n", type);
|
|
conn->packets_to_send |= 1 << type;
|
|
if (conn->sk && conn->sk->tpos == conn->sk->tbuf)
|
|
ev_schedule(conn->tx_ev);
|
|
}
|
|
|
|
void
|
|
bgp_kick_tx(void *vconn)
|
|
{
|
|
struct bgp_conn *conn = vconn;
|
|
|
|
DBG("BGP: kicking TX\n");
|
|
while (bgp_fire_tx(conn))
|
|
;
|
|
}
|
|
|
|
void
|
|
bgp_tx(sock *sk)
|
|
{
|
|
struct bgp_conn *conn = sk->data;
|
|
|
|
DBG("BGP: TX hook\n");
|
|
while (bgp_fire_tx(conn))
|
|
;
|
|
}
|
|
|
|
/* Capatibility negotiation as per RFC 2842 */
|
|
|
|
void
|
|
bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
|
|
{
|
|
struct bgp_proto *p = conn->bgp;
|
|
int cl;
|
|
u32 as;
|
|
|
|
while (len > 0)
|
|
{
|
|
if (len < 2 || len < 2 + opt[1])
|
|
goto err;
|
|
|
|
cl = opt[1];
|
|
|
|
switch (opt[0])
|
|
{
|
|
case 65:
|
|
if (cl != 4)
|
|
goto err;
|
|
conn->peer_as4_support = 1;
|
|
if (conn->want_as4_support)
|
|
conn->advertised_as = get_u32(opt + 2);
|
|
break;
|
|
|
|
/* We can safely ignore all other capabilities */
|
|
}
|
|
len -= 2 + cl;
|
|
opt += 2 + cl;
|
|
}
|
|
return;
|
|
|
|
err:
|
|
bgp_error(conn, 2, 0, NULL, 0);
|
|
return;
|
|
}
|
|
|
|
static int
|
|
bgp_parse_options(struct bgp_conn *conn, byte *opt, int len)
|
|
{
|
|
int ol;
|
|
|
|
while (len > 0)
|
|
{
|
|
if (len < 2 || len < 2 + opt[1])
|
|
{ bgp_error(conn, 2, 0, NULL, 0); return 0; }
|
|
#ifdef LOCAL_DEBUG
|
|
{
|
|
int i;
|
|
DBG("\tOption %02x:", opt[0]);
|
|
for(i=0; i<opt[1]; i++)
|
|
DBG(" %02x", opt[2+i]);
|
|
DBG("\n");
|
|
}
|
|
#endif
|
|
|
|
ol = opt[1];
|
|
switch (opt[0])
|
|
{
|
|
case 2:
|
|
bgp_parse_capabilities(conn, opt + 2, ol);
|
|
break;
|
|
|
|
default:
|
|
/*
|
|
* BGP specs don't tell us to send which option
|
|
* we didn't recognize, but it's common practice
|
|
* to do so. Also, capability negotiation with
|
|
* Cisco routers doesn't work without that.
|
|
*/
|
|
bgp_error(conn, 2, 4, opt, ol);
|
|
return 0;
|
|
}
|
|
len -= 2 + ol;
|
|
opt += 2 + ol;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
|
|
{
|
|
struct bgp_conn *other;
|
|
struct bgp_proto *p = conn->bgp;
|
|
struct bgp_config *cf = p->cf;
|
|
unsigned hold;
|
|
u32 id;
|
|
|
|
/* Check state */
|
|
if (conn->state != BS_OPENSENT)
|
|
{ bgp_error(conn, 5, 0, NULL, 0); return; }
|
|
|
|
/* Check message contents */
|
|
if (len < 29 || len != 29 + pkt[28])
|
|
{ bgp_error(conn, 1, 2, pkt+16, 2); return; }
|
|
if (pkt[19] != BGP_VERSION)
|
|
{ bgp_error(conn, 2, 1, pkt+19, 1); return; } /* RFC 1771 says 16 bits, draft-09 tells to use 8 */
|
|
conn->advertised_as = get_u16(pkt+20);
|
|
hold = get_u16(pkt+22);
|
|
id = get_u32(pkt+24);
|
|
BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", conn->advertised_as, hold, id);
|
|
|
|
if (bgp_parse_options(conn, pkt+29, pkt[28]))
|
|
return;
|
|
|
|
if (hold > 0 && hold < 3)
|
|
{ bgp_error(conn, 2, 6, pkt+22, 2); return; }
|
|
|
|
if (!id || id == 0xffffffff || id == p->local_id)
|
|
{ bgp_error(conn, 2, 3, pkt+24, -4); return; }
|
|
|
|
if (conn->advertised_as != p->remote_as)
|
|
{
|
|
bgp_error(conn, 2, 2, (byte *) &(conn->advertised_as), -4); return;
|
|
}
|
|
|
|
/* Check the other connection */
|
|
other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
|
|
switch (other->state)
|
|
{
|
|
case BS_IDLE:
|
|
case BS_CONNECT:
|
|
case BS_ACTIVE:
|
|
case BS_OPENSENT:
|
|
case BS_CLOSE:
|
|
break;
|
|
case BS_OPENCONFIRM:
|
|
if ((p->local_id < id) == (conn == &p->incoming_conn))
|
|
{
|
|
/* Should close the other connection */
|
|
BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
|
|
bgp_error(other, 6, 0, NULL, 0);
|
|
break;
|
|
}
|
|
/* Fall thru */
|
|
case BS_ESTABLISHED:
|
|
/* Should close this connection */
|
|
BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
|
|
bgp_error(conn, 6, 0, NULL, 0);
|
|
return;
|
|
default:
|
|
bug("bgp_rx_open: Unknown state");
|
|
}
|
|
|
|
/* Update our local variables */
|
|
conn->hold_time = MIN(hold, p->cf->hold_time);
|
|
conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
|
|
p->remote_id = id;
|
|
p->as4_session = conn->want_as4_support && conn->peer_as4_support;
|
|
|
|
DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, p->as4_session);
|
|
|
|
bgp_schedule_packet(conn, PKT_KEEPALIVE);
|
|
bgp_start_timer(conn->hold_timer, conn->hold_time);
|
|
conn->state = BS_OPENCONFIRM;
|
|
}
|
|
|
|
#define DECODE_PREFIX(pp, ll) do { \
|
|
int b = *pp++; \
|
|
int q; \
|
|
ll--; \
|
|
if (b > BITS_PER_IP_ADDRESS) { err=10; goto bad; } \
|
|
q = (b+7) / 8; \
|
|
if (ll < q) { err=1; goto bad; } \
|
|
memcpy(&prefix, pp, q); \
|
|
pp += q; \
|
|
ll -= q; \
|
|
ipa_ntoh(prefix); \
|
|
prefix = ipa_and(prefix, ipa_mkmask(b)); \
|
|
pxlen = b; \
|
|
} while (0)
|
|
|
|
static inline int
|
|
bgp_get_nexthop(struct bgp_proto *bgp, rta *a)
|
|
{
|
|
neighbor *neigh;
|
|
ip_addr nexthop;
|
|
struct eattr *nh = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
|
|
ASSERT(nh);
|
|
nexthop = *(ip_addr *) nh->u.ptr->data;
|
|
neigh = neigh_find(&bgp->p, &nexthop, 0);
|
|
if (neigh)
|
|
{
|
|
if (neigh->scope == SCOPE_HOST)
|
|
{
|
|
DBG("BGP: Loop!\n");
|
|
return 0;
|
|
}
|
|
}
|
|
else
|
|
neigh = bgp->neigh;
|
|
a->gw = neigh->addr;
|
|
a->iface = neigh->iface;
|
|
return 1;
|
|
}
|
|
|
|
#ifndef IPV6 /* IPv4 version */
|
|
|
|
static void
|
|
bgp_do_rx_update(struct bgp_conn *conn,
|
|
byte *withdrawn, int withdrawn_len,
|
|
byte *nlri, int nlri_len,
|
|
byte *attrs, int attr_len)
|
|
{
|
|
struct bgp_proto *p = conn->bgp;
|
|
rta *a0;
|
|
rta *a = NULL;
|
|
ip_addr prefix;
|
|
net *n;
|
|
int err = 0, pxlen;
|
|
|
|
/* Withdraw routes */
|
|
while (withdrawn_len)
|
|
{
|
|
DECODE_PREFIX(withdrawn, withdrawn_len);
|
|
DBG("Withdraw %I/%d\n", prefix, pxlen);
|
|
if (n = net_find(p->p.table, prefix, pxlen))
|
|
rte_update(p->p.table, n, &p->p, NULL);
|
|
}
|
|
|
|
if (!attr_len && !nlri_len) /* shortcut */
|
|
return;
|
|
|
|
a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, nlri_len);
|
|
if (a0 && nlri_len && bgp_get_nexthop(p, a0))
|
|
{
|
|
a = rta_lookup(a0);
|
|
while (nlri_len)
|
|
{
|
|
rte *e;
|
|
DECODE_PREFIX(nlri, nlri_len);
|
|
DBG("Add %I/%d\n", prefix, pxlen);
|
|
e = rte_get_temp(rta_clone(a));
|
|
n = net_get(p->p.table, prefix, pxlen);
|
|
e->net = n;
|
|
e->pflags = 0;
|
|
rte_update(p->p.table, n, &p->p, e);
|
|
}
|
|
}
|
|
bad:
|
|
if (a)
|
|
rta_free(a);
|
|
if (err)
|
|
bgp_error(conn, 3, err, NULL, 0);
|
|
return;
|
|
}
|
|
|
|
#else /* IPv6 version */
|
|
|
|
#define DO_NLRI(name) \
|
|
start = x = p->name##_start; \
|
|
len = len0 = p->name##_len; \
|
|
if (len) \
|
|
{ \
|
|
if (len < 3) goto bad; \
|
|
af = get_u16(x); \
|
|
sub = x[2]; \
|
|
x += 3; \
|
|
len -= 3; \
|
|
DBG("\tNLRI AF=%d sub=%d len=%d\n", af, sub, len);\
|
|
} \
|
|
else \
|
|
af = 0; \
|
|
if (af == BGP_AF_IPV6)
|
|
|
|
static void
|
|
bgp_do_rx_update(struct bgp_conn *conn,
|
|
byte *withdrawn, int withdrawn_len,
|
|
byte *nlri, int nlri_len,
|
|
byte *attrs, int attr_len)
|
|
{
|
|
struct bgp_proto *p = conn->bgp;
|
|
byte *start, *x;
|
|
int len, len0;
|
|
unsigned af, sub;
|
|
rta *a0;
|
|
rta *a = NULL;
|
|
ip_addr prefix;
|
|
net *n;
|
|
rte e;
|
|
int err = 0, pxlen;
|
|
|
|
p->mp_reach_len = 0;
|
|
p->mp_unreach_len = 0;
|
|
a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, 0);
|
|
if (!a0)
|
|
return;
|
|
|
|
DO_NLRI(mp_unreach)
|
|
{
|
|
while (len)
|
|
{
|
|
DECODE_PREFIX(x, len);
|
|
DBG("Withdraw %I/%d\n", prefix, pxlen);
|
|
if (n = net_find(p->p.table, prefix, pxlen))
|
|
rte_update(p->p.table, n, &p->p, NULL);
|
|
}
|
|
}
|
|
|
|
DO_NLRI(mp_reach)
|
|
{
|
|
int i;
|
|
|
|
/* Create fake NEXT_HOP attribute */
|
|
if (len < 1 || (*x != 16 && *x != 32) || len < *x + 2)
|
|
goto bad;
|
|
memcpy(bgp_attach_attr_wa(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, 16), x+1, 16);
|
|
len -= *x + 2;
|
|
x += *x + 1;
|
|
|
|
/* Ignore SNPA info */
|
|
i = *x++;
|
|
while (i--)
|
|
{
|
|
if (len < 1 || len < 1 + *x)
|
|
goto bad;
|
|
len -= *x + 1;
|
|
x += *x + 1;
|
|
}
|
|
|
|
if (bgp_get_nexthop(p, a0))
|
|
{
|
|
a = rta_lookup(a0);
|
|
while (len)
|
|
{
|
|
rte *e;
|
|
DECODE_PREFIX(x, len);
|
|
DBG("Add %I/%d\n", prefix, pxlen);
|
|
e = rte_get_temp(rta_clone(a));
|
|
n = net_get(p->p.table, prefix, pxlen);
|
|
e->net = n;
|
|
e->pflags = 0;
|
|
rte_update(p->p.table, n, &p->p, e);
|
|
}
|
|
rta_free(a);
|
|
}
|
|
}
|
|
|
|
return;
|
|
|
|
bad:
|
|
bgp_error(conn, 3, 9, start, len0);
|
|
if (a)
|
|
rta_free(a);
|
|
return;
|
|
}
|
|
|
|
#endif
|
|
|
|
static void
|
|
bgp_rx_update(struct bgp_conn *conn, byte *pkt, int len)
|
|
{
|
|
struct bgp_proto *p = conn->bgp;
|
|
byte *withdrawn, *attrs, *nlri;
|
|
int withdrawn_len, attr_len, nlri_len;
|
|
|
|
BGP_TRACE_RL(&rl_rcv_update, D_PACKETS, "Got UPDATE");
|
|
|
|
if (conn->state != BS_ESTABLISHED)
|
|
{ bgp_error(conn, 5, 0, NULL, 0); return; }
|
|
bgp_start_timer(conn->hold_timer, conn->hold_time);
|
|
|
|
/* Find parts of the packet and check sizes */
|
|
if (len < 23)
|
|
{
|
|
bgp_error(conn, 1, 2, pkt+16, 2);
|
|
return;
|
|
}
|
|
withdrawn = pkt + 21;
|
|
withdrawn_len = get_u16(pkt + 19);
|
|
if (withdrawn_len + 23 > len)
|
|
goto malformed;
|
|
attrs = withdrawn + withdrawn_len + 2;
|
|
attr_len = get_u16(attrs - 2);
|
|
if (withdrawn_len + attr_len + 23 > len)
|
|
goto malformed;
|
|
nlri = attrs + attr_len;
|
|
nlri_len = len - withdrawn_len - attr_len - 23;
|
|
if (!attr_len && nlri_len)
|
|
goto malformed;
|
|
DBG("Sizes: withdrawn=%d, attrs=%d, NLRI=%d\n", withdrawn_len, attr_len, nlri_len);
|
|
|
|
lp_flush(bgp_linpool);
|
|
|
|
bgp_do_rx_update(conn, withdrawn, withdrawn_len, nlri, nlri_len, attrs, attr_len);
|
|
return;
|
|
|
|
malformed:
|
|
bgp_error(conn, 3, 1, NULL, 0);
|
|
}
|
|
|
|
static struct {
|
|
byte major, minor;
|
|
byte *msg;
|
|
} bgp_msg_table[] = {
|
|
{ 1, 0, "Invalid message header" },
|
|
{ 1, 1, "Connection not synchronized" },
|
|
{ 1, 2, "Bad message length" },
|
|
{ 1, 3, "Bad message type" },
|
|
{ 2, 0, "Invalid OPEN message" },
|
|
{ 2, 1, "Unsupported version number" },
|
|
{ 2, 2, "Bad peer AS" },
|
|
{ 2, 3, "Bad BGP identifier" },
|
|
{ 2, 4, "Unsupported optional parameter" },
|
|
{ 2, 5, "Authentication failure" },
|
|
{ 2, 6, "Unacceptable hold time" },
|
|
{ 2, 7, "Required capability missing" }, /* [RFC3392] */
|
|
{ 3, 0, "Invalid UPDATE message" },
|
|
{ 3, 1, "Malformed attribute list" },
|
|
{ 3, 2, "Unrecognized well-known attribute" },
|
|
{ 3, 3, "Missing mandatory attribute" },
|
|
{ 3, 4, "Invalid attribute flags" },
|
|
{ 3, 5, "Invalid attribute length" },
|
|
{ 3, 6, "Invalid ORIGIN attribute" },
|
|
{ 3, 7, "AS routing loop" }, /* Deprecated */
|
|
{ 3, 8, "Invalid NEXT_HOP attribute" },
|
|
{ 3, 9, "Optional attribute error" },
|
|
{ 3, 10, "Invalid network field" },
|
|
{ 3, 11, "Malformed AS_PATH" },
|
|
{ 4, 0, "Hold timer expired" },
|
|
{ 5, 0, "Finite state machine error" },
|
|
{ 6, 0, "Cease" }, /* Subcodes are according to [RFC4486] */
|
|
{ 6, 1, "Maximum number of prefixes reached" },
|
|
{ 6, 2, "Administrative shutdown" },
|
|
{ 6, 3, "Peer de-configured" },
|
|
{ 6, 4, "Administrative reset" },
|
|
{ 6, 5, "Connection rejected" },
|
|
{ 6, 6, "Other configuration change" },
|
|
{ 6, 7, "Connection collision resolution" },
|
|
{ 6, 8, "Out of Resources" }
|
|
};
|
|
|
|
/**
|
|
* bgp_error_dsc - return BGP error description
|
|
* @buff: temporary buffer
|
|
* @code: BGP error code
|
|
* @subcode: BGP error subcode
|
|
*
|
|
* bgp_error_dsc() returns error description for BGP errors
|
|
* which might be static string or given temporary buffer.
|
|
*/
|
|
const byte *
|
|
bgp_error_dsc(byte *buff, unsigned code, unsigned subcode)
|
|
{
|
|
unsigned i;
|
|
for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++)
|
|
if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode)
|
|
{
|
|
return bgp_msg_table[i].msg;
|
|
}
|
|
|
|
bsprintf(buff, "Unknown error %d.%d", code, subcode);
|
|
return buff;
|
|
}
|
|
|
|
void
|
|
bgp_log_error(struct bgp_proto *p, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len)
|
|
{
|
|
const byte *name;
|
|
byte namebuf[32];
|
|
byte *t, argbuf[36];
|
|
unsigned i;
|
|
|
|
if (code == 6 && !subcode) /* Don't report Cease messages */
|
|
return;
|
|
|
|
name = bgp_error_dsc(namebuf, code, subcode);
|
|
t = argbuf;
|
|
if (len)
|
|
{
|
|
*t++ = ':';
|
|
*t++ = ' ';
|
|
if (len > 16)
|
|
len = 16;
|
|
for (i=0; i<len; i++)
|
|
t += bsprintf(t, "%02x", data[i]);
|
|
}
|
|
*t = 0;
|
|
log(L_REMOTE "%s: %s: %s%s", p->p.name, msg, name, argbuf);
|
|
}
|
|
|
|
static void
|
|
bgp_rx_notification(struct bgp_conn *conn, byte *pkt, int len)
|
|
{
|
|
struct bgp_proto *p = conn->bgp;
|
|
if (len < 21)
|
|
{
|
|
bgp_error(conn, 1, 2, pkt+16, 2);
|
|
return;
|
|
}
|
|
|
|
unsigned code = pkt[19];
|
|
unsigned subcode = pkt[20];
|
|
int delay = 1;
|
|
|
|
#ifndef IPV6
|
|
if ((code == 2) && ((subcode == 4) || (subcode == 7)))
|
|
{
|
|
/* Error related to capability:
|
|
* 4 - Peer does not support capabilities at all.
|
|
* 7 - Peer request some capability. Strange unless it is IPv6 only peer.
|
|
* We try connect without capabilities
|
|
*/
|
|
BGP_TRACE(D_EVENTS, "Capability related error received, capabilities disabled");
|
|
conn->bgp->start_state = BSS_CONNECT_NOCAP;
|
|
delay = 0;
|
|
}
|
|
#endif
|
|
|
|
bgp_log_error(conn->bgp, "Received error notification", code, subcode, pkt+21, len-21);
|
|
bgp_store_error(conn->bgp, conn, BE_BGP_RX, (code << 16) | subcode);
|
|
if (delay) bgp_update_startup_delay(conn->bgp, conn, code, subcode);
|
|
bgp_conn_enter_close_state(conn);
|
|
bgp_schedule_packet(conn, PKT_SCHEDULE_CLOSE);
|
|
}
|
|
|
|
static void
|
|
bgp_rx_keepalive(struct bgp_conn *conn)
|
|
{
|
|
struct bgp_proto *p = conn->bgp;
|
|
|
|
BGP_TRACE(D_PACKETS, "Got KEEPALIVE");
|
|
bgp_start_timer(conn->hold_timer, conn->hold_time);
|
|
switch (conn->state)
|
|
{
|
|
case BS_OPENCONFIRM:
|
|
bgp_conn_enter_established_state(conn);
|
|
break;
|
|
case BS_ESTABLISHED:
|
|
break;
|
|
default:
|
|
bgp_error(conn, 5, 0, NULL, 0);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* bgp_rx_packet - handle a received packet
|
|
* @conn: BGP connection
|
|
* @pkt: start of the packet
|
|
* @len: packet size
|
|
*
|
|
* bgp_rx_packet() takes a newly received packet and calls the corresponding
|
|
* packet handler according to the packet type.
|
|
*/
|
|
static void
|
|
bgp_rx_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
|
|
{
|
|
DBG("BGP: Got packet %02x (%d bytes)\n", pkt[18], len);
|
|
switch (pkt[18])
|
|
{
|
|
case PKT_OPEN: return bgp_rx_open(conn, pkt, len);
|
|
case PKT_UPDATE: return bgp_rx_update(conn, pkt, len);
|
|
case PKT_NOTIFICATION: return bgp_rx_notification(conn, pkt, len);
|
|
case PKT_KEEPALIVE: return bgp_rx_keepalive(conn);
|
|
default: bgp_error(conn, 1, 3, pkt+18, 1);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* bgp_rx - handle received data
|
|
* @sk: socket
|
|
* @size: amount of data received
|
|
*
|
|
* bgp_rx() is called by the socket layer whenever new data arrive from
|
|
* the underlying TCP connection. It assembles the data fragments to packets,
|
|
* checks their headers and framing and passes complete packets to
|
|
* bgp_rx_packet().
|
|
*/
|
|
int
|
|
bgp_rx(sock *sk, int size)
|
|
{
|
|
struct bgp_conn *conn = sk->data;
|
|
byte *pkt_start = sk->rbuf;
|
|
byte *end = pkt_start + size;
|
|
unsigned i, len;
|
|
|
|
DBG("BGP: RX hook: Got %d bytes\n", size);
|
|
while (end >= pkt_start + BGP_HEADER_LENGTH)
|
|
{
|
|
if ((conn->state == BS_CLOSE) || (conn->sk != sk))
|
|
return 0;
|
|
for(i=0; i<16; i++)
|
|
if (pkt_start[i] != 0xff)
|
|
{
|
|
bgp_error(conn, 1, 1, NULL, 0);
|
|
break;
|
|
}
|
|
len = get_u16(pkt_start+16);
|
|
if (len < BGP_HEADER_LENGTH || len > BGP_MAX_PACKET_LENGTH)
|
|
{
|
|
bgp_error(conn, 1, 2, pkt_start+16, 2);
|
|
break;
|
|
}
|
|
if (end < pkt_start + len)
|
|
break;
|
|
bgp_rx_packet(conn, pkt_start, len);
|
|
pkt_start += len;
|
|
}
|
|
if (pkt_start != sk->rbuf)
|
|
{
|
|
memmove(sk->rbuf, pkt_start, end - pkt_start);
|
|
sk->rpos = sk->rbuf + (end - pkt_start);
|
|
}
|
|
return 0;
|
|
}
|