bird/proto/babel/packets.c

1522 lines
37 KiB
C
Raw Normal View History

/*
* BIRD -- The Babel protocol
*
* Copyright (c) 2015--2016 Toke Hoiland-Jorgensen
* (c) 2016--2017 Ondrej Zajicek <santiago@crfreenet.org>
* (c) 2016--2017 CZ.NIC z.s.p.o.
*
* Can be freely distributed and used under the terms of the GNU GPL.
*
* This file contains the packet and TLV handling code for the protocol.
*/
#include "babel.h"
struct babel_pkt_header {
u8 magic;
u8 version;
u16 length;
} PACKED;
struct babel_tlv {
u8 type;
u8 length;
u8 value[0];
} PACKED;
struct babel_tlv_ack_req {
u8 type;
u8 length;
u16 reserved;
u16 nonce;
u16 interval;
} PACKED;
struct babel_tlv_ack {
u8 type;
u8 length;
u16 nonce;
} PACKED;
struct babel_tlv_hello {
u8 type;
u8 length;
u16 flags;
u16 seqno;
u16 interval;
} PACKED;
struct babel_tlv_ihu {
u8 type;
u8 length;
u8 ae;
u8 reserved;
u16 rxcost;
u16 interval;
u8 addr[0];
} PACKED;
struct babel_tlv_router_id {
u8 type;
u8 length;
u16 reserved;
u64 router_id;
} PACKED;
struct babel_tlv_next_hop {
u8 type;
u8 length;
u8 ae;
u8 reserved;
u8 addr[0];
} PACKED;
struct babel_tlv_update {
u8 type;
u8 length;
u8 ae;
u8 flags;
u8 plen;
u8 omitted;
u16 interval;
u16 seqno;
u16 metric;
u8 addr[0];
} PACKED;
struct babel_tlv_route_request {
u8 type;
u8 length;
u8 ae;
u8 plen;
u8 addr[0];
} PACKED;
struct babel_tlv_seqno_request {
u8 type;
u8 length;
u8 ae;
u8 plen;
u16 seqno;
u8 hop_count;
u8 reserved;
u64 router_id;
u8 addr[0];
} PACKED;
struct babel_subtlv_source_prefix {
u8 type;
u8 length;
u8 plen;
u8 addr[0];
} PACKED;
/* Hello flags */
#define BABEL_HF_UNICAST 0x8000
/* Update flags */
#define BABEL_UF_DEF_PREFIX 0x80
#define BABEL_UF_ROUTER_ID 0x40
struct babel_parse_state {
struct babel_proto *proto;
struct babel_iface *ifa;
ip_addr saddr;
ip_addr next_hop_ip4;
ip_addr next_hop_ip6;
u64 router_id; /* Router ID used in subsequent updates */
u8 def_ip6_prefix[16]; /* Implicit IPv6 prefix in network order */
u8 def_ip4_prefix[4]; /* Implicit IPv4 prefix in network order */
u8 router_id_seen; /* router_id field is valid */
u8 def_ip6_prefix_seen; /* def_ip6_prefix is valid */
u8 def_ip4_prefix_seen; /* def_ip4_prefix is valid */
u8 current_tlv_endpos; /* End of self-terminating TLVs (offset from start) */
u8 sadr_enabled;
};
enum parse_result {
PARSE_SUCCESS,
PARSE_ERROR,
PARSE_IGNORE,
};
struct babel_write_state {
u64 router_id;
u8 router_id_seen;
ip_addr next_hop_ip4;
ip_addr next_hop_ip6;
u8 def_ip6_prefix[16]; /* Implicit IPv6 prefix in network order */
u8 def_ip6_pxlen;
};
#define DROP(DSC,VAL) do { err_dsc = DSC; err_val = VAL; goto drop; } while(0)
#define DROP1(DSC) do { err_dsc = DSC; goto drop; } while(0)
#define LOG_PKT(msg, args...) \
log_rl(&p->log_pkt_tbf, L_REMOTE "%s: " msg, p->p.name, args)
#define FIRST_TLV(p) ((struct babel_tlv *) (((struct babel_pkt_header *) p) + 1))
#define NEXT_TLV(t) ((struct babel_tlv *) (((byte *) t) + TLV_LENGTH(t)))
#define TLV_LENGTH(t) (t->type == BABEL_TLV_PAD1 ? 1 : t->length + sizeof(struct babel_tlv))
#define TLV_OPT_LENGTH(t) (t->length + sizeof(struct babel_tlv) - sizeof(*t))
#define TLV_HDR(tlv,t,l) ({ tlv->type = t; tlv->length = l - sizeof(struct babel_tlv); })
#define TLV_HDR0(tlv,t) TLV_HDR(tlv, t, tlv_data[t].min_length)
2016-12-10 07:11:26 +08:00
#define NET_SIZE(n) BYTES(net_pxlen(n))
static inline uint
bytes_equal(u8 *b1, u8 *b2, uint maxlen)
{
uint i;
for (i = 0; (i < maxlen) && (*b1 == *b2); i++, b1++, b2++)
;
return i;
}
2017-10-13 18:34:08 +08:00
static inline uint
get_time16(const void *p)
{
2017-10-13 18:34:08 +08:00
uint v = get_u16(p) * BABEL_TIME_UNITS;
return MAX(BABEL_MIN_INTERVAL, v);
}
static inline void
2017-10-13 18:34:08 +08:00
put_time16(void *p, uint v)
{
2017-10-13 18:34:08 +08:00
put_u16(p, v / BABEL_TIME_UNITS);
}
static inline void
read_ip4_px(net_addr *n, const void *p, uint plen)
{
ip4_addr addr = {0};
memcpy(&addr, p, BYTES(plen));
net_fill_ip4(n, ip4_ntoh(addr), plen);
}
static inline void
put_ip4_px(void *p, net_addr *n)
{
ip4_addr addr = ip4_hton(net4_prefix(n));
memcpy(p, &addr, NET_SIZE(n));
}
2016-12-10 07:11:26 +08:00
static inline void
read_ip6_px(net_addr *n, const void *p, uint plen)
{
ip6_addr addr = IPA_NONE;
memcpy(&addr, p, BYTES(plen));
2016-12-10 07:11:26 +08:00
net_fill_ip6(n, ip6_ntoh(addr), plen);
}
static inline void
2016-12-10 07:11:26 +08:00
put_ip6_px(void *p, net_addr *n)
{
2016-12-10 07:11:26 +08:00
ip6_addr addr = ip6_hton(net6_prefix(n));
memcpy(p, &addr, NET_SIZE(n));
}
static inline ip6_addr
get_ip6_ll(const void *p)
{
return ip6_build(0xfe800000, 0, get_u32(p+0), get_u32(p+4));
}
static inline void
put_ip6_ll(void *p, ip6_addr addr)
{
put_u32(p+0, _I2(addr));
put_u32(p+4, _I3(addr));
}
/*
* TLV read/write functions
*/
static int babel_read_ack_req(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state);
static int babel_read_hello(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state);
static int babel_read_ihu(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state);
static int babel_read_router_id(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state);
static int babel_read_next_hop(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state);
static int babel_read_update(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state);
static int babel_read_route_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state);
static int babel_read_seqno_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state);
static int babel_read_source_prefix(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state);
static uint babel_write_ack(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, uint max_len);
static uint babel_write_hello(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, uint max_len);
static uint babel_write_ihu(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, uint max_len);
static uint babel_write_update(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, uint max_len);
static uint babel_write_route_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, uint max_len);
static uint babel_write_seqno_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, uint max_len);
static int babel_write_source_prefix(struct babel_tlv *hdr, net_addr *net, uint max_len);
struct babel_tlv_data {
u8 min_length;
int (*read_tlv)(struct babel_tlv *hdr, union babel_msg *m, struct babel_parse_state *state);
uint (*write_tlv)(struct babel_tlv *hdr, union babel_msg *m, struct babel_write_state *state, uint max_len);
void (*handle_tlv)(union babel_msg *m, struct babel_iface *ifa);
};
static const struct babel_tlv_data tlv_data[BABEL_TLV_MAX] = {
[BABEL_TLV_ACK_REQ] = {
sizeof(struct babel_tlv_ack_req),
babel_read_ack_req,
NULL,
babel_handle_ack_req
},
[BABEL_TLV_ACK] = {
sizeof(struct babel_tlv_ack),
NULL,
babel_write_ack,
NULL
},
[BABEL_TLV_HELLO] = {
sizeof(struct babel_tlv_hello),
babel_read_hello,
babel_write_hello,
babel_handle_hello
},
[BABEL_TLV_IHU] = {
sizeof(struct babel_tlv_ihu),
babel_read_ihu,
babel_write_ihu,
babel_handle_ihu
},
[BABEL_TLV_ROUTER_ID] = {
sizeof(struct babel_tlv_router_id),
babel_read_router_id,
NULL,
NULL
},
[BABEL_TLV_NEXT_HOP] = {
sizeof(struct babel_tlv_next_hop),
babel_read_next_hop,
NULL,
NULL
},
[BABEL_TLV_UPDATE] = {
sizeof(struct babel_tlv_update),
babel_read_update,
babel_write_update,
babel_handle_update
},
[BABEL_TLV_ROUTE_REQUEST] = {
sizeof(struct babel_tlv_route_request),
babel_read_route_request,
babel_write_route_request,
babel_handle_route_request
},
[BABEL_TLV_SEQNO_REQUEST] = {
sizeof(struct babel_tlv_seqno_request),
babel_read_seqno_request,
babel_write_seqno_request,
babel_handle_seqno_request
},
};
static int
babel_read_ack_req(struct babel_tlv *hdr, union babel_msg *m,
struct babel_parse_state *state)
{
struct babel_tlv_ack_req *tlv = (void *) hdr;
struct babel_msg_ack_req *msg = &m->ack_req;
msg->type = BABEL_TLV_ACK_REQ;
msg->nonce = get_u16(&tlv->nonce);
msg->interval = get_time16(&tlv->interval);
msg->sender = state->saddr;
if (!msg->interval)
return PARSE_ERROR;
return PARSE_SUCCESS;
}
static uint
babel_write_ack(struct babel_tlv *hdr, union babel_msg *m,
struct babel_write_state *state UNUSED, uint max_len UNUSED)
{
struct babel_tlv_ack *tlv = (void *) hdr;
struct babel_msg_ack *msg = &m->ack;
TLV_HDR0(tlv, BABEL_TLV_ACK);
put_u16(&tlv->nonce, msg->nonce);
return sizeof(struct babel_tlv_ack);
}
static int
babel_read_hello(struct babel_tlv *hdr, union babel_msg *m,
struct babel_parse_state *state)
{
struct babel_tlv_hello *tlv = (void *) hdr;
struct babel_msg_hello *msg = &m->hello;
/* We currently don't support unicast Hello */
u16 flags = get_u16(&tlv->flags);
if (flags & BABEL_HF_UNICAST)
return PARSE_IGNORE;
msg->type = BABEL_TLV_HELLO;
msg->seqno = get_u16(&tlv->seqno);
msg->interval = get_time16(&tlv->interval);
msg->sender = state->saddr;
return PARSE_SUCCESS;
}
static uint
babel_write_hello(struct babel_tlv *hdr, union babel_msg *m,
struct babel_write_state *state UNUSED, uint max_len UNUSED)
{
struct babel_tlv_hello *tlv = (void *) hdr;
struct babel_msg_hello *msg = &m->hello;
TLV_HDR0(tlv, BABEL_TLV_HELLO);
put_u16(&tlv->seqno, msg->seqno);
put_time16(&tlv->interval, msg->interval);
return sizeof(struct babel_tlv_hello);
}
static int
babel_read_ihu(struct babel_tlv *hdr, union babel_msg *m,
struct babel_parse_state *state)
{
struct babel_tlv_ihu *tlv = (void *) hdr;
struct babel_msg_ihu *msg = &m->ihu;
msg->type = BABEL_TLV_IHU;
msg->ae = tlv->ae;
msg->rxcost = get_u16(&tlv->rxcost);
msg->interval = get_time16(&tlv->interval);
msg->addr = IPA_NONE;
msg->sender = state->saddr;
if (msg->ae >= BABEL_AE_MAX)
return PARSE_IGNORE;
/*
* We only actually read link-local IPs. In every other case, the addr field
* will be 0 but validation will succeed. The handler takes care of these
* cases. We handle them here anyway because we need the length for parsing
* subtlvs.
*/
switch (msg->ae)
{
case BABEL_AE_IP4:
if (TLV_OPT_LENGTH(tlv) < 4)
return PARSE_ERROR;
state->current_tlv_endpos += 4;
break;
case BABEL_AE_IP6:
if (TLV_OPT_LENGTH(tlv) < 16)
return PARSE_ERROR;
state->current_tlv_endpos += 16;
break;
case BABEL_AE_IP6_LL:
if (TLV_OPT_LENGTH(tlv) < 8)
return PARSE_ERROR;
msg->addr = ipa_from_ip6(get_ip6_ll(&tlv->addr));
state->current_tlv_endpos += 8;
break;
}
return PARSE_SUCCESS;
}
static uint
babel_write_ihu(struct babel_tlv *hdr, union babel_msg *m,
struct babel_write_state *state UNUSED, uint max_len)
{
struct babel_tlv_ihu *tlv = (void *) hdr;
struct babel_msg_ihu *msg = &m->ihu;
if (ipa_is_link_local(msg->addr) && max_len < sizeof(struct babel_tlv_ihu) + 8)
return 0;
TLV_HDR0(tlv, BABEL_TLV_IHU);
put_u16(&tlv->rxcost, msg->rxcost);
put_time16(&tlv->interval, msg->interval);
if (!ipa_is_link_local(msg->addr))
{
tlv->ae = BABEL_AE_WILDCARD;
return sizeof(struct babel_tlv_ihu);
}
put_ip6_ll(&tlv->addr, msg->addr);
tlv->ae = BABEL_AE_IP6_LL;
hdr->length += 8;
return sizeof(struct babel_tlv_ihu) + 8;
}
static int
babel_read_router_id(struct babel_tlv *hdr, union babel_msg *m UNUSED,
struct babel_parse_state *state)
{
struct babel_tlv_router_id *tlv = (void *) hdr;
state->router_id = get_u64(&tlv->router_id);
state->router_id_seen = 1;
return PARSE_IGNORE;
}
/* This is called directly from babel_write_update() */
static uint
babel_write_router_id(struct babel_tlv *hdr, u64 router_id,
struct babel_write_state *state, uint max_len UNUSED)
{
struct babel_tlv_router_id *tlv = (void *) hdr;
/* We still assume that first min_length bytes are available and zeroed */
TLV_HDR0(tlv, BABEL_TLV_ROUTER_ID);
put_u64(&tlv->router_id, router_id);
state->router_id = router_id;
state->router_id_seen = 1;
return sizeof(struct babel_tlv_router_id);
}
static int
babel_read_next_hop(struct babel_tlv *hdr, union babel_msg *m UNUSED,
struct babel_parse_state *state)
{
struct babel_tlv_next_hop *tlv = (void *) hdr;
switch (tlv->ae)
{
case BABEL_AE_WILDCARD:
return PARSE_ERROR;
case BABEL_AE_IP4:
if (TLV_OPT_LENGTH(tlv) < sizeof(ip4_addr))
return PARSE_ERROR;
state->next_hop_ip4 = ipa_from_ip4(get_ip4(&tlv->addr));
state->current_tlv_endpos += sizeof(ip4_addr);
return PARSE_IGNORE;
case BABEL_AE_IP6:
if (TLV_OPT_LENGTH(tlv) < sizeof(ip6_addr))
return PARSE_ERROR;
state->next_hop_ip6 = ipa_from_ip6(get_ip6(&tlv->addr));
state->current_tlv_endpos += sizeof(ip6_addr);
return PARSE_IGNORE;
case BABEL_AE_IP6_LL:
if (TLV_OPT_LENGTH(tlv) < 8)
return PARSE_ERROR;
state->next_hop_ip6 = ipa_from_ip6(get_ip6_ll(&tlv->addr));
state->current_tlv_endpos += 8;
return PARSE_IGNORE;
default:
return PARSE_IGNORE;
}
return PARSE_IGNORE;
}
/* This is called directly from babel_write_update() and returns -1 if a next
hop should be written but there is not enough space. */
static int
babel_write_next_hop(struct babel_tlv *hdr, ip_addr addr,
struct babel_write_state *state, uint max_len)
{
struct babel_tlv_next_hop *tlv = (void *) hdr;
if (ipa_zero(addr))
{
/* Should not happen */
return 0;
}
else if (ipa_is_ip4(addr) && !ipa_equal(addr, state->next_hop_ip4))
{
uint len = sizeof(struct babel_tlv_next_hop) + sizeof(ip4_addr);
if (len > max_len)
return -1;
TLV_HDR(tlv, BABEL_TLV_NEXT_HOP, len);
tlv->ae = BABEL_AE_IP4;
put_ip4(&tlv->addr, ipa_to_ip4(addr));
state->next_hop_ip4 = addr;
return len;
}
else if (ipa_is_ip6(addr) && !ipa_equal(addr, state->next_hop_ip6))
{
uint len = sizeof(struct babel_tlv_next_hop) + sizeof(ip6_addr);
if (len > max_len)
return -1;
TLV_HDR(tlv, BABEL_TLV_NEXT_HOP, len);
tlv->ae = BABEL_AE_IP6;
put_ip6(&tlv->addr, ipa_to_ip6(addr));
state->next_hop_ip6 = addr;
return len;
}
return 0;
}
static int
babel_read_update(struct babel_tlv *hdr, union babel_msg *m,
struct babel_parse_state *state)
{
struct babel_tlv_update *tlv = (void *) hdr;
struct babel_msg_update *msg = &m->update;
msg->type = BABEL_TLV_UPDATE;
msg->interval = get_time16(&tlv->interval);
msg->seqno = get_u16(&tlv->seqno);
msg->metric = get_u16(&tlv->metric);
/* Length of received prefix data without omitted part */
int len = BYTES(tlv->plen) - (int) tlv->omitted;
u8 buf[16] = {};
if ((len < 0) || ((uint) len > TLV_OPT_LENGTH(tlv)))
return PARSE_ERROR;
switch (tlv->ae)
{
case BABEL_AE_WILDCARD:
if (tlv->plen > 0)
return PARSE_ERROR;
2016-12-10 07:11:26 +08:00
if (msg->metric != 65535)
return PARSE_ERROR;
msg->wildcard = 1;
break;
case BABEL_AE_IP4:
if (tlv->plen > IP4_MAX_PREFIX_LENGTH)
return PARSE_ERROR;
/* Cannot omit data if there is no saved prefix */
if (tlv->omitted && !state->def_ip4_prefix_seen)
return PARSE_ERROR;
/* Update must have next hop, unless it is retraction */
if (ipa_zero(state->next_hop_ip4) && (msg->metric != BABEL_INFINITY))
return PARSE_ERROR;
/* Merge saved prefix and received prefix parts */
memcpy(buf, state->def_ip4_prefix, tlv->omitted);
memcpy(buf + tlv->omitted, tlv->addr, len);
ip4_addr prefix4 = get_ip4(buf);
net_fill_ip4(&msg->net, prefix4, tlv->plen);
if (tlv->flags & BABEL_UF_DEF_PREFIX)
{
put_ip4(state->def_ip4_prefix, prefix4);
state->def_ip4_prefix_seen = 1;
}
msg->next_hop = state->next_hop_ip4;
break;
case BABEL_AE_IP6:
2016-12-10 07:11:26 +08:00
if (tlv->plen > IP6_MAX_PREFIX_LENGTH)
return PARSE_ERROR;
/* Cannot omit data if there is no saved prefix */
if (tlv->omitted && !state->def_ip6_prefix_seen)
return PARSE_ERROR;
/* Merge saved prefix and received prefix parts */
memcpy(buf, state->def_ip6_prefix, tlv->omitted);
memcpy(buf + tlv->omitted, tlv->addr, len);
ip6_addr prefix6 = get_ip6(buf);
net_fill_ip6(&msg->net, prefix6, tlv->plen);
if (state->sadr_enabled)
net_make_ip6_sadr(&msg->net);
if (tlv->flags & BABEL_UF_DEF_PREFIX)
{
put_ip6(state->def_ip6_prefix, prefix6);
state->def_ip6_prefix_seen = 1;
}
if (tlv->flags & BABEL_UF_ROUTER_ID)
{
state->router_id = ((u64) _I2(prefix6)) << 32 | _I3(prefix6);
state->router_id_seen = 1;
}
msg->next_hop = state->next_hop_ip6;
break;
case BABEL_AE_IP6_LL:
/* ??? */
return PARSE_IGNORE;
default:
return PARSE_IGNORE;
}
/* Update must have Router ID, unless it is retraction */
if (!state->router_id_seen && (msg->metric != BABEL_INFINITY))
{
DBG("Babel: No router ID seen before update\n");
return PARSE_ERROR;
}
msg->router_id = state->router_id;
msg->sender = state->saddr;
state->current_tlv_endpos += len;
return PARSE_SUCCESS;
}
static uint
babel_write_update(struct babel_tlv *hdr, union babel_msg *m,
struct babel_write_state *state, uint max_len)
{
struct babel_msg_update *msg = &m->update;
uint len0 = 0;
/*
* When needed, we write Router-ID TLV before Update TLV and return size of
* both of them. There is enough space for the Router-ID TLV, because
* sizeof(struct babel_tlv_router_id) == sizeof(struct babel_tlv_update).
*
* Router ID is not used for retractions, so do not use it in such case.
*/
if ((msg->metric < BABEL_INFINITY) &&
(!state->router_id_seen || (msg->router_id != state->router_id)))
{
len0 = babel_write_router_id(hdr, msg->router_id, state, max_len);
hdr = NEXT_TLV(hdr);
}
/*
* We also may add Next Hop TLV for regular updates. It may fail for not
* enough space or it may be unnecessary as the next hop is the same as the
* last one already announced. So we handle all three cases.
*/
if (msg->metric < BABEL_INFINITY)
{
int l = babel_write_next_hop(hdr, msg->next_hop, state, max_len - len0);
if (l < 0)
return 0;
if (l)
{
len0 += l;
hdr = NEXT_TLV(hdr);
}
}
struct babel_tlv_update *tlv = (void *) hdr;
2016-12-10 07:11:26 +08:00
uint len = sizeof(struct babel_tlv_update) + NET_SIZE(&msg->net);
if (len0 + len > max_len)
return 0;
memset(tlv, 0, sizeof(struct babel_tlv_update));
TLV_HDR(tlv, BABEL_TLV_UPDATE, len);
if (msg->wildcard)
{
tlv->ae = BABEL_AE_WILDCARD;
tlv->plen = 0;
}
else if (msg->net.type == NET_IP4)
{
tlv->ae = BABEL_AE_IP4;
tlv->plen = net4_pxlen(&msg->net);
put_ip4_px(tlv->addr, &msg->net);
}
else
{
tlv->ae = BABEL_AE_IP6;
2016-12-10 07:11:26 +08:00
tlv->plen = net6_pxlen(&msg->net);
/* Address compression - omit initial matching bytes */
u8 buf[16], omit;
put_ip6(buf, net6_prefix(&msg->net));
omit = bytes_equal(buf, state->def_ip6_prefix,
MIN(tlv->plen, state->def_ip6_pxlen) / 8);
if (omit > 0)
{
memcpy(tlv->addr, buf + omit, NET_SIZE(&msg->net) - omit);
tlv->omitted = omit;
tlv->length -= omit;
len -= omit;
}
else
{
put_ip6_px(tlv->addr, &msg->net);
tlv->flags |= BABEL_UF_DEF_PREFIX;
put_ip6(state->def_ip6_prefix, net6_prefix(&msg->net));
state->def_ip6_pxlen = tlv->plen;
}
}
put_time16(&tlv->interval, msg->interval);
put_u16(&tlv->seqno, msg->seqno);
put_u16(&tlv->metric, msg->metric);
if (msg->net.type == NET_IP6_SADR)
{
int l = babel_write_source_prefix(hdr, &msg->net, max_len - (len0 + len));
if (l < 0)
return 0;
len += l;
}
return len0 + len;
}
static int
babel_read_route_request(struct babel_tlv *hdr, union babel_msg *m,
struct babel_parse_state *state)
{
struct babel_tlv_route_request *tlv = (void *) hdr;
struct babel_msg_route_request *msg = &m->route_request;
msg->type = BABEL_TLV_ROUTE_REQUEST;
switch (tlv->ae)
{
case BABEL_AE_WILDCARD:
/* Wildcard requests must have plen 0 */
if (tlv->plen > 0)
return PARSE_ERROR;
msg->full = 1;
return PARSE_SUCCESS;
case BABEL_AE_IP4:
if (tlv->plen > IP4_MAX_PREFIX_LENGTH)
return PARSE_ERROR;
if (TLV_OPT_LENGTH(tlv) < BYTES(tlv->plen))
return PARSE_ERROR;
read_ip4_px(&msg->net, tlv->addr, tlv->plen);
state->current_tlv_endpos += BYTES(tlv->plen);
return PARSE_SUCCESS;
case BABEL_AE_IP6:
2016-12-10 07:11:26 +08:00
if (tlv->plen > IP6_MAX_PREFIX_LENGTH)
return PARSE_ERROR;
if (TLV_OPT_LENGTH(tlv) < BYTES(tlv->plen))
return PARSE_ERROR;
2016-12-10 07:11:26 +08:00
read_ip6_px(&msg->net, tlv->addr, tlv->plen);
state->current_tlv_endpos += BYTES(tlv->plen);
if (state->sadr_enabled)
net_make_ip6_sadr(&msg->net);
return PARSE_SUCCESS;
case BABEL_AE_IP6_LL:
return PARSE_ERROR;
default:
return PARSE_IGNORE;
}
return PARSE_IGNORE;
}
static uint
babel_write_route_request(struct babel_tlv *hdr, union babel_msg *m,
struct babel_write_state *state UNUSED, uint max_len)
{
struct babel_tlv_route_request *tlv = (void *) hdr;
struct babel_msg_route_request *msg = &m->route_request;
2016-12-10 07:11:26 +08:00
uint len = sizeof(struct babel_tlv_route_request) + NET_SIZE(&msg->net);
if (len > max_len)
return 0;
TLV_HDR(tlv, BABEL_TLV_ROUTE_REQUEST, len);
if (msg->full)
{
tlv->ae = BABEL_AE_WILDCARD;
tlv->plen = 0;
}
else if (msg->net.type == NET_IP4)
{
tlv->ae = BABEL_AE_IP4;
tlv->plen = net4_pxlen(&msg->net);
put_ip4_px(tlv->addr, &msg->net);
}
else
{
tlv->ae = BABEL_AE_IP6;
2016-12-10 07:11:26 +08:00
tlv->plen = net6_pxlen(&msg->net);
put_ip6_px(tlv->addr, &msg->net);
}
if (msg->net.type == NET_IP6_SADR)
{
int l = babel_write_source_prefix(hdr, &msg->net, max_len - len);
if (l < 0)
return 0;
len += l;
}
return len;
}
static int
babel_read_seqno_request(struct babel_tlv *hdr, union babel_msg *m,
struct babel_parse_state *state)
{
struct babel_tlv_seqno_request *tlv = (void *) hdr;
struct babel_msg_seqno_request *msg = &m->seqno_request;
msg->type = BABEL_TLV_SEQNO_REQUEST;
msg->seqno = get_u16(&tlv->seqno);
msg->hop_count = tlv->hop_count;
msg->router_id = get_u64(&tlv->router_id);
msg->sender = state->saddr;
if (tlv->hop_count == 0)
return PARSE_ERROR;
switch (tlv->ae)
{
case BABEL_AE_WILDCARD:
return PARSE_ERROR;
case BABEL_AE_IP4:
if (tlv->plen > IP4_MAX_PREFIX_LENGTH)
return PARSE_ERROR;
if (TLV_OPT_LENGTH(tlv) < BYTES(tlv->plen))
return PARSE_ERROR;
read_ip4_px(&msg->net, tlv->addr, tlv->plen);
state->current_tlv_endpos += BYTES(tlv->plen);
return PARSE_SUCCESS;
case BABEL_AE_IP6:
2016-12-10 07:11:26 +08:00
if (tlv->plen > IP6_MAX_PREFIX_LENGTH)
return PARSE_ERROR;
if (TLV_OPT_LENGTH(tlv) < BYTES(tlv->plen))
return PARSE_ERROR;
2016-12-10 07:11:26 +08:00
read_ip6_px(&msg->net, tlv->addr, tlv->plen);
state->current_tlv_endpos += BYTES(tlv->plen);
if (state->sadr_enabled)
net_make_ip6_sadr(&msg->net);
return PARSE_SUCCESS;
case BABEL_AE_IP6_LL:
return PARSE_ERROR;
default:
return PARSE_IGNORE;
}
return PARSE_IGNORE;
}
static uint
babel_write_seqno_request(struct babel_tlv *hdr, union babel_msg *m,
struct babel_write_state *state UNUSED, uint max_len)
{
struct babel_tlv_seqno_request *tlv = (void *) hdr;
struct babel_msg_seqno_request *msg = &m->seqno_request;
2016-12-10 07:11:26 +08:00
uint len = sizeof(struct babel_tlv_seqno_request) + NET_SIZE(&msg->net);
if (len > max_len)
return 0;
TLV_HDR(tlv, BABEL_TLV_SEQNO_REQUEST, len);
if (msg->net.type == NET_IP4)
{
tlv->ae = BABEL_AE_IP4;
tlv->plen = net4_pxlen(&msg->net);
put_ip4_px(tlv->addr, &msg->net);
}
else
{
tlv->ae = BABEL_AE_IP6;
tlv->plen = net6_pxlen(&msg->net);
put_ip6_px(tlv->addr, &msg->net);
}
put_u16(&tlv->seqno, msg->seqno);
tlv->hop_count = msg->hop_count;
put_u64(&tlv->router_id, msg->router_id);
if (msg->net.type == NET_IP6_SADR)
{
int l = babel_write_source_prefix(hdr, &msg->net, max_len - len);
if (l < 0)
return 0;
len += l;
}
return len;
}
static int
babel_read_source_prefix(struct babel_tlv *hdr, union babel_msg *msg,
struct babel_parse_state *state UNUSED)
{
struct babel_subtlv_source_prefix *tlv = (void *) hdr;
net_addr_ip6_sadr *net;
/*
* We would like to skip the sub-TLV if SADR is not enabled, but we do not
* know AF of the enclosing TLV yet. We will do that later.
*/
/* Check internal consistency */
if ((tlv->length < 1) ||
(tlv->plen > IP6_MAX_PREFIX_LENGTH) ||
(tlv->length < (1 + BYTES(tlv->plen))))
return PARSE_ERROR;
/* Plen MUST NOT be 0 */
if (tlv->plen == 0)
return PARSE_ERROR;
switch(msg->type)
{
case BABEL_TLV_UPDATE:
/* Wildcard updates with source prefix MUST be silently ignored */
if (msg->update.wildcard)
return PARSE_IGNORE;
net = (void *) &msg->update.net;
break;
case BABEL_TLV_ROUTE_REQUEST:
/* Wildcard requests with source addresses MUST be silently ignored */
if (msg->route_request.full)
return PARSE_IGNORE;
net = (void *) &msg->route_request.net;
break;
case BABEL_TLV_SEQNO_REQUEST:
net = (void *) &msg->seqno_request.net;
break;
default:
return PARSE_ERROR;
}
/* If SADR is active, the net has appropriate type */
if (net->type != NET_IP6_SADR)
return PARSE_IGNORE;
/* Duplicate Source Prefix sub-TLV; SHOULD ignore whole TLV */
if (net->src_pxlen > 0)
return PARSE_IGNORE;
net_addr_ip6 src;
read_ip6_px((void *) &src, tlv->addr, tlv->plen);
net->src_prefix = src.prefix;
net->src_pxlen = src.pxlen;
return PARSE_SUCCESS;
}
static int
babel_write_source_prefix(struct babel_tlv *hdr, net_addr *n, uint max_len)
{
struct babel_subtlv_source_prefix *tlv = (void *) NEXT_TLV(hdr);
net_addr_ip6_sadr *net = (void *) n;
/* Do not use this sub-TLV for default prefix */
if (net->src_pxlen == 0)
return 0;
uint len = sizeof(*tlv) + BYTES(net->src_pxlen);
if (len > max_len)
return -1;
TLV_HDR(tlv, BABEL_SUBTLV_SOURCE_PREFIX, len);
hdr->length += len;
net_addr_ip6 src = NET_ADDR_IP6(net->src_prefix, net->src_pxlen);
tlv->plen = src.pxlen;
put_ip6_px(tlv->addr, (void *) &src);
return len;
}
static inline int
babel_read_subtlvs(struct babel_tlv *hdr,
union babel_msg *msg,
struct babel_parse_state *state)
{
struct babel_tlv *tlv;
byte *pos, *end = (byte *) hdr + TLV_LENGTH(hdr);
int res;
for (tlv = (void *) hdr + state->current_tlv_endpos;
(byte *) tlv < end;
tlv = NEXT_TLV(tlv))
{
/* Ugly special case */
if (tlv->type == BABEL_TLV_PAD1)
continue;
/* The end of the common TLV header */
pos = (byte *)tlv + sizeof(struct babel_tlv);
if ((pos > end) || (pos + tlv->length > end))
return PARSE_ERROR;
/*
* The subtlv type space is non-contiguous (due to the mandatory bit), so
* use a switch for dispatch instead of the mapping array we use for TLVs
*/
switch (tlv->type)
{
case BABEL_SUBTLV_SOURCE_PREFIX:
res = babel_read_source_prefix(tlv, msg, state);
if (res != PARSE_SUCCESS)
return res;
break;
case BABEL_SUBTLV_PADN:
default:
/* Unknown mandatory subtlv; PARSE_IGNORE ignores the whole TLV */
if (tlv->type >= 128)
return PARSE_IGNORE;
break;
}
}
return PARSE_SUCCESS;
}
static inline int
babel_read_tlv(struct babel_tlv *hdr,
union babel_msg *msg,
struct babel_parse_state *state)
{
if ((hdr->type <= BABEL_TLV_PADN) ||
(hdr->type >= BABEL_TLV_MAX) ||
!tlv_data[hdr->type].read_tlv)
return PARSE_IGNORE;
if (TLV_LENGTH(hdr) < tlv_data[hdr->type].min_length)
return PARSE_ERROR;
state->current_tlv_endpos = tlv_data[hdr->type].min_length;
memset(msg, 0, sizeof(*msg));
int res = tlv_data[hdr->type].read_tlv(hdr, msg, state);
if (res != PARSE_SUCCESS)
return res;
return babel_read_subtlvs(hdr, msg, state);
}
static uint
babel_write_tlv(struct babel_tlv *hdr,
union babel_msg *msg,
struct babel_write_state *state,
uint max_len)
{
if ((msg->type <= BABEL_TLV_PADN) ||
(msg->type >= BABEL_TLV_MAX) ||
!tlv_data[msg->type].write_tlv)
return 0;
if (tlv_data[msg->type].min_length > max_len)
return 0;
memset(hdr, 0, tlv_data[msg->type].min_length);
return tlv_data[msg->type].write_tlv(hdr, msg, state, max_len);
}
/*
* Packet RX/TX functions
*/
static int
babel_send_to(struct babel_iface *ifa, ip_addr dest)
{
sock *sk = ifa->sk;
struct babel_pkt_header *hdr = (void *) sk->tbuf;
int len = get_u16(&hdr->length) + sizeof(struct babel_pkt_header);
DBG("Babel: Sending %d bytes to %I\n", len, dest);
return sk_send_to(sk, len, dest, 0);
}
/**
* babel_write_queue - Write a TLV queue to a transmission buffer
* @ifa: Interface holding the transmission buffer
* @queue: TLV queue to write (containing internal-format TLVs)
*
* This function writes a packet to the interface transmission buffer with as
* many TLVs from the &queue as will fit in the buffer. It returns the number of
* bytes written (NOT counting the packet header). The function is called by
* babel_send_queue() and babel_send_unicast() to construct packets for
* transmission, and uses per-TLV helper functions to convert the
* internal-format TLVs to their wire representations.
*
* The TLVs in the queue are freed after they are written to the buffer.
*/
static uint
babel_write_queue(struct babel_iface *ifa, list *queue)
{
struct babel_proto *p = ifa->proto;
struct babel_write_state state = { .next_hop_ip6 = ifa->addr };
if (EMPTY_LIST(*queue))
return 0;
byte *pos = ifa->sk->tbuf;
byte *end = pos + ifa->tx_length;
struct babel_pkt_header *pkt = (void *) pos;
pkt->magic = BABEL_MAGIC;
pkt->version = BABEL_VERSION;
pkt->length = 0;
pos += sizeof(struct babel_pkt_header);
struct babel_msg_node *msg;
WALK_LIST_FIRST(msg, *queue)
{
if (pos >= end)
break;
int len = babel_write_tlv((struct babel_tlv *) pos, &msg->msg, &state, end - pos);
if (!len)
break;
pos += len;
rem_node(NODE msg);
sl_free(p->msg_slab, msg);
}
uint plen = pos - (byte *) pkt;
put_u16(&pkt->length, plen - sizeof(struct babel_pkt_header));
return plen;
}
void
babel_send_queue(void *arg)
{
struct babel_iface *ifa = arg;
while ((babel_write_queue(ifa, &ifa->msg_queue) > 0) &&
(babel_send_to(ifa, IP6_BABEL_ROUTERS) > 0));
}
static inline void
babel_kick_queue(struct babel_iface *ifa)
{
/*
* Only schedule send event if there is not already data in the socket buffer.
* Otherwise we may overwrite the data already in the buffer.
*/
if ((ifa->sk->tpos == ifa->sk->tbuf) && !ev_active(ifa->send_event))
ev_schedule(ifa->send_event);
}
/**
* babel_send_unicast - send a single TLV via unicast to a destination
* @msg: TLV to send
* @ifa: Interface to send via
* @dest: Destination of the TLV
*
* This function is used to send a single TLV via unicast to a designated
* receiver. This is used for replying to certain incoming requests, and for
* sending unicast requests to refresh routes before they expire.
*/
void
babel_send_unicast(union babel_msg *msg, struct babel_iface *ifa, ip_addr dest)
{
struct babel_proto *p = ifa->proto;
struct babel_msg_node *msgn = sl_alloc(p->msg_slab);
list queue;
msgn->msg = *msg;
init_list(&queue);
add_tail(&queue, NODE msgn);
babel_write_queue(ifa, &queue);
babel_send_to(ifa, dest);
/* We could overwrite waiting packet here, we may have to kick TX queue */
if (!EMPTY_LIST(ifa->msg_queue))
babel_kick_queue(ifa);
}
/**
* babel_enqueue - enqueue a TLV for transmission on an interface
* @msg: TLV to enqueue (in internal TLV format)
* @ifa: Interface to enqueue to
*
* This function is called to enqueue a TLV for subsequent transmission on an
* interface. The transmission event is triggered whenever a TLV is enqueued;
* this ensures that TLVs will be transmitted in a timely manner, but that TLVs
* which are enqueued in rapid succession can be transmitted together in one
* packet.
*/
void
babel_enqueue(union babel_msg *msg, struct babel_iface *ifa)
{
struct babel_proto *p = ifa->proto;
struct babel_msg_node *msgn = sl_alloc(p->msg_slab);
msgn->msg = *msg;
add_tail(&ifa->msg_queue, NODE msgn);
babel_kick_queue(ifa);
}
/**
* babel_process_packet - process incoming data packet
* @pkt: Pointer to the packet data
* @len: Length of received packet
* @saddr: Address of packet sender
* @ifa: Interface packet was received on.
*
* This function is the main processing hook of incoming Babel packets. It
* checks that the packet header is well-formed, then processes the TLVs
* contained in the packet. This is done in two passes: First all TLVs are
* parsed into the internal TLV format. If a TLV parser fails, processing of the
* rest of the packet is aborted.
*
* After the parsing step, the TLV handlers are called for each parsed TLV in
* order.
*/
static void
babel_process_packet(struct babel_pkt_header *pkt, int len,
ip_addr saddr, struct babel_iface *ifa)
{
struct babel_proto *p = ifa->proto;
struct babel_tlv *tlv;
struct babel_msg_node *msg;
list msgs;
int res;
int plen = sizeof(struct babel_pkt_header) + get_u16(&pkt->length);
byte *pos;
byte *end = (byte *)pkt + plen;
struct babel_parse_state state = {
.proto = p,
.ifa = ifa,
.saddr = saddr,
.next_hop_ip6 = saddr,
.sadr_enabled = babel_sadr_enabled(p),
};
if ((pkt->magic != BABEL_MAGIC) || (pkt->version != BABEL_VERSION))
{
TRACE(D_PACKETS, "Strange packet from %I via %s - magic %d version %d",
saddr, ifa->iface->name, pkt->magic, pkt->version);
return;
}
if (plen > len)
{
LOG_PKT("Bad packet from %I via %s - %s (%u)",
saddr, ifa->iface->name, "length mismatch", plen);
return;
}
TRACE(D_PACKETS, "Packet received from %I via %s",
saddr, ifa->iface->name);
init_list(&msgs);
/* First pass through the packet TLV by TLV, parsing each into internal data
structures. */
for (tlv = FIRST_TLV(pkt);
(byte *)tlv < end;
tlv = NEXT_TLV(tlv))
{
/* Ugly special case */
if (tlv->type == BABEL_TLV_PAD1)
continue;
/* The end of the common TLV header */
pos = (byte *)tlv + sizeof(struct babel_tlv);
if ((pos > end) || (pos + tlv->length > end))
{
LOG_PKT("Bad TLV from %I via %s type %d pos %d - framing error",
saddr, ifa->iface->name, tlv->type, (byte *)tlv - (byte *)pkt);
break;
}
msg = sl_alloc(p->msg_slab);
res = babel_read_tlv(tlv, &msg->msg, &state);
if (res == PARSE_SUCCESS)
{
add_tail(&msgs, NODE msg);
}
else if (res == PARSE_IGNORE)
{
DBG("Babel: Ignoring TLV of type %d\n", tlv->type);
sl_free(p->msg_slab, msg);
}
else /* PARSE_ERROR */
{
LOG_PKT("Bad TLV from %I via %s type %d pos %d - parse error",
saddr, ifa->iface->name, tlv->type, (byte *)tlv - (byte *)pkt);
sl_free(p->msg_slab, msg);
break;
}
}
/* Parsing done, handle all parsed TLVs */
WALK_LIST_FIRST(msg, msgs)
{
if (tlv_data[msg->msg.type].handle_tlv)
tlv_data[msg->msg.type].handle_tlv(&msg->msg, ifa);
rem_node(NODE msg);
sl_free(p->msg_slab, msg);
}
}
static void
babel_err_hook(sock *sk, int err)
{
struct babel_iface *ifa = sk->data;
struct babel_proto *p = ifa->proto;
log(L_ERR "%s: Socket error on %s: %M", p->p.name, ifa->iface->name, err);
/* FIXME: Drop queued TLVs here? */
}
static void
babel_tx_hook(sock *sk)
{
struct babel_iface *ifa = sk->data;
DBG("Babel: TX hook called (iface %s, src %I, dst %I)\n",
sk->iface->name, sk->saddr, sk->daddr);
babel_send_queue(ifa);
}
static int
babel_rx_hook(sock *sk, uint len)
{
struct babel_iface *ifa = sk->data;
struct babel_proto *p = ifa->proto;
const char *err_dsc = NULL;
uint err_val = 0;
if (sk->lifindex != ifa->iface->index)
return 1;
DBG("Babel: RX hook called (iface %s, src %I, dst %I)\n",
sk->iface->name, sk->faddr, sk->laddr);
/* Silently ignore my own packets */
if (ipa_equal(sk->faddr, sk->saddr))
return 1;
if (!ipa_is_link_local(sk->faddr))
DROP1("wrong src address");
if (sk->fport != ifa->cf->port)
DROP("wrong src port", sk->fport);
if (len < sizeof(struct babel_pkt_header))
DROP("too short", len);
if (sk->flags & SKF_TRUNCATED)
DROP("truncated", len);
babel_process_packet((struct babel_pkt_header *) sk->rbuf, len, sk->faddr, ifa);
return 1;
drop:
LOG_PKT("Bad packet from %I via %s - %s (%u)",
sk->faddr, sk->iface->name, err_dsc, err_val);
return 1;
}
int
babel_open_socket(struct babel_iface *ifa)
{
struct babel_proto *p = ifa->proto;
sock *sk;
sk = sk_new(ifa->pool);
sk->type = SK_UDP;
sk->sport = ifa->cf->port;
sk->dport = ifa->cf->port;
sk->iface = ifa->iface;
sk->saddr = ifa->addr;
Basic VRF support Add basic VRF (virtual routing and forwarding) support. Protocols can be associated with VRFs, such protocols will be restricted to interfaces assigned to the VRF (as reported by Linux kernel) and will use sockets bound to the VRF. E.g., different multihop BGP instances can use diffent kernel routing tables to handle BGP TCP connections. The VRF support is preliminary, currently there are several limitations: - Recent Linux kernels (4.11) do not handle correctly sockets bound to interaces that are part of VRF, so most protocols other than multihop BGP do not work. This will be fixed by future kernel versions. - Neighbor cache ignores VRFs. Breaks config with the same prefix on local interfaces in different VRFs. Not much problem as single hop protocols do not work anyways. - Olock code ignores VRFs. Breaks config with multiple BGP peers with the same IP address in different VRFs. - Incoming BGP connections are not dispatched according to VRFs. Breaks config with multiple BGP peers with the same IP address in different VRFs. Perhaps we would need some kernel API to read VRF of incoming connection? Or probably use multiple listening sockets in int-new branch. - We should handle master VRF interface up/down events and perhaps disable associated protocols when VRF goes down. Or at least disable associated interfaces. - Also we should check if the master iface is really VRF iface and not some other kind of master iface. - BFD session request dispatch should be aware of VRFs. - Perhaps kernel protocol should read default kernel table ID from VRF iface so it is not necessary to configure it. - Perhaps we should have per-VRF default table.
2017-09-06 23:38:48 +08:00
sk->vrf = p->p.vrf;
sk->rx_hook = babel_rx_hook;
sk->tx_hook = babel_tx_hook;
sk->err_hook = babel_err_hook;
sk->data = ifa;
sk->tos = ifa->cf->tx_tos;
sk->priority = ifa->cf->tx_priority;
sk->ttl = 1;
sk->flags = SKF_LADDR_RX;
if (sk_open(sk) < 0)
goto err;
if (sk_setup_multicast(sk) < 0)
goto err;
if (sk_join_group(sk, IP6_BABEL_ROUTERS) < 0)
goto err;
ifa->sk = sk;
return 1;
err:
sk_log_error(sk, p->p.name);
rfree(sk);
return 0;
}