bird/proto/radv/packets.c

518 lines
11 KiB
C
Raw Permalink Normal View History

/*
* BIRD -- RAdv Packet Processing
*
* (c) 2011--2019 Ondrej Zajicek <santiago@crfreenet.org>
* (c) 2011--2019 CZ.NIC z.s.p.o.
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
#include <stdlib.h>
#include "radv.h"
struct radv_ra_packet
{
u8 type;
u8 code;
u16 checksum;
u8 current_hop_limit;
u8 flags;
u16 router_lifetime;
u32 reachable_time;
u32 retrans_timer;
};
#define OPT_RA_MANAGED 0x80
#define OPT_RA_OTHER_CFG 0x40
#define OPT_PREFIX 3
#define OPT_MTU 5
#define OPT_ROUTE 24
#define OPT_RDNSS 25
#define OPT_DNSSL 31
struct radv_opt_prefix
{
u8 type;
u8 length;
u8 pxlen;
u8 flags;
u32 valid_lifetime;
u32 preferred_lifetime;
u32 reserved;
ip6_addr prefix;
};
#define OPT_PX_ONLINK 0x80
#define OPT_PX_AUTONOMOUS 0x40
struct radv_opt_mtu
{
u8 type;
u8 length;
u16 reserved;
u32 mtu;
};
struct radv_opt_route {
u8 type;
u8 length;
u8 pxlen;
u8 flags;
u32 lifetime;
u8 prefix[];
};
struct radv_opt_rdnss
{
u8 type;
u8 length;
u16 reserved;
u32 lifetime;
ip6_addr servers[];
};
struct radv_opt_dnssl
{
u8 type;
u8 length;
u16 reserved;
u32 lifetime;
char domain[];
};
static int
radv_prepare_route(struct radv_iface *ifa, struct radv_route *rt,
char **buf, char *bufend)
{
struct radv_proto *p = ifa->ra;
u8 px_blocks = (net6_pxlen(rt->n.addr) + 63) / 64;
u8 opt_len = 8 * (1 + px_blocks);
if (*buf + opt_len > bufend)
{
log(L_WARN, "%s: Too many RA options on interface %s",
p->p.name, ifa->iface->name);
return -1;
}
uint preference = rt->preference_set ? rt->preference : ifa->cf->route_preference;
uint lifetime = rt->lifetime_set ? rt->lifetime : ifa->cf->route_lifetime;
uint valid = rt->valid && p->valid && (p->active || !ifa->cf->route_lifetime_sensitive);
struct radv_opt_route *opt = (void *) *buf;
*buf += opt_len;
opt->type = OPT_ROUTE;
opt->length = 1 + px_blocks;
opt->pxlen = net6_pxlen(rt->n.addr);
opt->flags = preference;
opt->lifetime = valid ? htonl(lifetime) : 0;
/* Copy the relevant part of the prefix */
ip6_addr px_addr = ip6_hton(net6_prefix(rt->n.addr));
memcpy(opt->prefix, &px_addr, 8 * px_blocks);
/* Keeping track of first linger timeout */
if (!rt->valid)
ifa->valid_time = MIN(ifa->valid_time, rt->changed + ifa->cf->route_linger_time S);
return 0;
}
static int
radv_prepare_rdnss(struct radv_iface *ifa, list *rdnss_list, char **buf, char *bufend)
{
struct radv_rdnss_config *rcf = HEAD(*rdnss_list);
while(NODE_VALID(rcf))
{
struct radv_rdnss_config *rcf_base = rcf;
struct radv_opt_rdnss *op = (void *) *buf;
int max_i = (bufend - *buf - sizeof(struct radv_opt_rdnss)) / sizeof(ip6_addr);
int i = 0;
if (max_i < 1)
goto too_much;
op->type = OPT_RDNSS;
op->reserved = 0;
if (rcf->lifetime_mult)
op->lifetime = htonl(rcf->lifetime_mult * ifa->cf->max_ra_int);
else
op->lifetime = htonl(rcf->lifetime);
while(NODE_VALID(rcf) &&
(rcf->lifetime == rcf_base->lifetime) &&
(rcf->lifetime_mult == rcf_base->lifetime_mult))
{
if (i >= max_i)
goto too_much;
op->servers[i] = ip6_hton(rcf->server);
i++;
rcf = NODE_NEXT(rcf);
}
op->length = 1+2*i;
*buf += 8 * op->length;
}
return 0;
too_much:
log(L_WARN "%s: Too many RA options on interface %s",
ifa->ra->p.name, ifa->iface->name);
return -1;
}
int
radv_process_domain(struct radv_dnssl_config *cf)
{
/* Format of domain in search list is <size> <label> <size> <label> ... 0 */
const char *dom = cf->domain;
const char *dom_end = dom; /* Just to */
u8 *dlen_save = &cf->dlen_first;
uint len;
while (dom_end)
{
dom_end = strchr(dom, '.');
len = dom_end ? (uint)(dom_end - dom) : strlen(dom);
if (len < 1 || len > 63)
return -1;
*dlen_save = len;
dlen_save = (u8 *) dom_end;
dom += len + 1;
}
len = dom - cf->domain;
if (len > 254)
return -1;
cf->dlen_all = len;
return 0;
}
static int
radv_prepare_dnssl(struct radv_iface *ifa, list *dnssl_list, char **buf, char *bufend)
{
struct radv_dnssl_config *dcf = HEAD(*dnssl_list);
while(NODE_VALID(dcf))
{
struct radv_dnssl_config *dcf_base = dcf;
struct radv_opt_dnssl *op = (void *) *buf;
int bsize = bufend - *buf - sizeof(struct radv_opt_dnssl);
int bpos = 0;
if (bsize < 0)
goto too_much;
bsize = bsize & ~7; /* Round down to multiples of 8 */
op->type = OPT_DNSSL;
op->reserved = 0;
if (dcf->lifetime_mult)
op->lifetime = htonl(dcf->lifetime_mult * ifa->cf->max_ra_int);
else
op->lifetime = htonl(dcf->lifetime);
while(NODE_VALID(dcf) &&
(dcf->lifetime == dcf_base->lifetime) &&
(dcf->lifetime_mult == dcf_base->lifetime_mult))
{
if (bpos + dcf->dlen_all + 1 > bsize)
goto too_much;
op->domain[bpos++] = dcf->dlen_first;
memcpy(op->domain + bpos, dcf->domain, dcf->dlen_all);
bpos += dcf->dlen_all;
dcf = NODE_NEXT(dcf);
}
int blen = (bpos + 7) / 8;
bzero(op->domain + bpos, 8 * blen - bpos);
op->length = 1 + blen;
*buf += 8 * op->length;
}
return 0;
too_much:
log(L_WARN "%s: Too many RA options on interface %s",
ifa->ra->p.name, ifa->iface->name);
return -1;
}
static int
radv_prepare_prefix(struct radv_iface *ifa, struct radv_prefix *px,
char **buf, char *bufend)
{
struct radv_prefix_config *pc = px->cf;
2017-08-30 01:17:35 +08:00
if (*buf + sizeof(struct radv_opt_prefix) > bufend)
{
2017-08-30 01:17:35 +08:00
log(L_WARN "%s: Too many prefixes on interface %s",
ifa->ra->p.name, ifa->iface->name);
return -1;
}
2017-08-30 01:17:35 +08:00
struct radv_opt_prefix *op = (void *) *buf;
op->type = OPT_PREFIX;
op->length = 4;
op->pxlen = px->prefix.pxlen;
op->flags = (pc->onlink ? OPT_PX_ONLINK : 0) |
(pc->autonomous ? OPT_PX_AUTONOMOUS : 0);
op->valid_lifetime = (ifa->ra->active || !pc->valid_lifetime_sensitive) ?
htonl(pc->valid_lifetime) : 0;
op->preferred_lifetime = (ifa->ra->active || !pc->preferred_lifetime_sensitive) ?
htonl(pc->preferred_lifetime) : 0;
op->reserved = 0;
op->prefix = ip6_hton(px->prefix.prefix);
*buf += sizeof(*op);
/* Keeping track of first linger timeout */
if (!px->valid)
ifa->valid_time = MIN(ifa->valid_time, px->changed + ifa->cf->prefix_linger_time S);
return 0;
}
static void
radv_prepare_ra(struct radv_iface *ifa)
{
struct radv_proto *p = ifa->ra;
struct radv_config *cf = (struct radv_config *) (p->p.cf);
struct radv_iface_config *ic = ifa->cf;
btime now = current_time();
char *buf = ifa->sk->tbuf;
char *bufstart = buf;
char *bufend = buf + ifa->sk->tbsize;
struct radv_ra_packet *pkt = (void *) buf;
pkt->type = ICMPV6_RA;
pkt->code = 0;
pkt->checksum = 0;
pkt->current_hop_limit = ic->current_hop_limit;
pkt->router_lifetime = (p->valid && (p->active || !ic->default_lifetime_sensitive)) ?
htons(ic->default_lifetime) : 0;
pkt->flags = (ic->managed ? OPT_RA_MANAGED : 0) |
(ic->other_config ? OPT_RA_OTHER_CFG : 0) |
(pkt->router_lifetime ? ic->default_preference : 0);
pkt->reachable_time = htonl(ic->reachable_time);
pkt->retrans_timer = htonl(ic->retrans_timer);
buf += sizeof(*pkt);
if (ic->link_mtu)
{
struct radv_opt_mtu *om = (void *) buf;
om->type = OPT_MTU;
om->length = 1;
om->reserved = 0;
om->mtu = htonl(ic->link_mtu);
buf += sizeof (*om);
}
/* Keeping track of first linger timeout */
ifa->valid_time = TIME_INFINITY;
struct radv_prefix *px;
WALK_LIST(px, ifa->prefixes)
{
/* Skip invalid prefixes that are past linger timeout but still not pruned */
if (!px->valid && ((px->changed + ic->prefix_linger_time S) <= now))
continue;
if (radv_prepare_prefix(ifa, px, &buf, bufend) < 0)
goto done;
}
if (! ic->rdnss_local)
if (radv_prepare_rdnss(ifa, &cf->rdnss_list, &buf, bufend) < 0)
goto done;
if (radv_prepare_rdnss(ifa, &ic->rdnss_list, &buf, bufend) < 0)
goto done;
if (! ic->dnssl_local)
if (radv_prepare_dnssl(ifa, &cf->dnssl_list, &buf, bufend) < 0)
goto done;
if (radv_prepare_dnssl(ifa, &ic->dnssl_list, &buf, bufend) < 0)
goto done;
if (p->fib_up)
{
FIB_WALK(&p->routes, struct radv_route, rt)
{
/* Skip invalid routes that are past linger timeout but still not pruned */
if (!rt->valid && ((rt->changed + ic->route_linger_time S) <= now))
continue;
if (radv_prepare_route(ifa, rt, &buf, bufend) < 0)
goto done;
}
FIB_WALK_END;
}
done:
ifa->plen = buf - bufstart;
}
void
radv_send_ra(struct radv_iface *ifa, ip_addr to)
{
struct radv_proto *p = ifa->ra;
/* TX queue is already full */
if (!sk_tx_buffer_empty(ifa->sk))
return;
if (ifa->valid_time <= current_time())
radv_invalidate(ifa);
/* We store prepared RA in tbuf */
if (!ifa->plen)
radv_prepare_ra(ifa);
if (ipa_zero(to))
{
to = IP6_ALL_NODES;
RADV_TRACE(D_PACKETS, "Sending RA via %s", ifa->iface->name);
}
else
{
RADV_TRACE(D_PACKETS, "Sending RA to %I via %s", to, ifa->iface->name);
}
int done = sk_send_to(ifa->sk, ifa->plen, to, 0);
if (!done)
log(L_WARN "%s: TX queue full on %s", p->p.name, ifa->iface->name);
}
static void
radv_receive_rs(struct radv_proto *p, struct radv_iface *ifa, ip_addr from)
{
RADV_TRACE(D_PACKETS, "Received RS from %I via %s",
from, ifa->iface->name);
if (ifa->cf->solicited_ra_unicast && ipa_nonzero(from))
radv_send_ra(ifa, from);
else
radv_iface_notify(ifa, RA_EV_RS);
}
static int
radv_rx_hook(sock *sk, uint size)
{
struct radv_iface *ifa = sk->data;
struct radv_proto *p = ifa->ra;
/* We want just packets from sk->iface */
if (sk->lifindex != sk->iface->index)
return 1;
if (ipa_equal(sk->faddr, sk->saddr))
return 1;
if (size < 8)
return 1;
byte *buf = sk->rbuf;
if (buf[1] != 0)
return 1;
/* Validation is a bit sloppy - Hop Limit is not checked and
length of options is ignored for RS and left to later for RA */
switch (buf[0])
{
case ICMPV6_RS:
radv_receive_rs(p, ifa, sk->faddr);
return 1;
case ICMPV6_RA:
RADV_TRACE(D_PACKETS, "Received RA from %I via %s",
sk->faddr, ifa->iface->name);
/* FIXME - there should be some checking of received RAs, but we just ignore them */
return 1;
default:
return 1;
}
}
static void
radv_tx_hook(sock *sk)
{
struct radv_iface *ifa = sk->data;
log(L_INFO "%s: TX queue ready on %s", ifa->ra->p.name, ifa->iface->name);
/* Some RAs may be missed due to full TX queue */
radv_iface_notify(ifa, RA_EV_RS);
}
static void
radv_err_hook(sock *sk, int err)
{
struct radv_iface *ifa = sk->data;
log(L_ERR "%s: Socket error on %s: %M", ifa->ra->p.name, ifa->iface->name, err);
}
int
radv_sk_open(struct radv_iface *ifa)
{
sock *sk = sk_new(ifa->pool);
sk->type = SK_IP;
sk->subtype = SK_IPV6;
sk->dport = ICMPV6_PROTO;
sk->saddr = ifa->addr->ip;
Basic VRF support Add basic VRF (virtual routing and forwarding) support. Protocols can be associated with VRFs, such protocols will be restricted to interfaces assigned to the VRF (as reported by Linux kernel) and will use sockets bound to the VRF. E.g., different multihop BGP instances can use diffent kernel routing tables to handle BGP TCP connections. The VRF support is preliminary, currently there are several limitations: - Recent Linux kernels (4.11) do not handle correctly sockets bound to interaces that are part of VRF, so most protocols other than multihop BGP do not work. This will be fixed by future kernel versions. - Neighbor cache ignores VRFs. Breaks config with the same prefix on local interfaces in different VRFs. Not much problem as single hop protocols do not work anyways. - Olock code ignores VRFs. Breaks config with multiple BGP peers with the same IP address in different VRFs. - Incoming BGP connections are not dispatched according to VRFs. Breaks config with multiple BGP peers with the same IP address in different VRFs. Perhaps we would need some kernel API to read VRF of incoming connection? Or probably use multiple listening sockets in int-new branch. - We should handle master VRF interface up/down events and perhaps disable associated protocols when VRF goes down. Or at least disable associated interfaces. - Also we should check if the master iface is really VRF iface and not some other kind of master iface. - BFD session request dispatch should be aware of VRFs. - Perhaps kernel protocol should read default kernel table ID from VRF iface so it is not necessary to configure it. - Perhaps we should have per-VRF default table.
2017-09-06 23:38:48 +08:00
sk->vrf = ifa->ra->p.vrf;
sk->ttl = 255; /* Mandatory for Neighbor Discovery packets */
sk->rx_hook = radv_rx_hook;
sk->tx_hook = radv_tx_hook;
sk->err_hook = radv_err_hook;
sk->iface = ifa->iface;
sk->rbsize = 1024; // bufsize(ifa);
sk->tbsize = 1024; // bufsize(ifa);
sk->data = ifa;
sk->flags = SKF_LADDR_RX;
2014-05-18 17:42:26 +08:00
if (sk_open(sk) < 0)
goto err;
/* We want listen just to ICMPv6 messages of type RS and RA */
2014-05-18 17:42:26 +08:00
if (sk_set_icmp6_filter(sk, ICMPV6_RS, ICMPV6_RA) < 0)
goto err;
if (sk_setup_multicast(sk) < 0)
goto err;
2014-10-24 17:11:43 +08:00
if (sk_join_group(sk, IP6_ALL_ROUTERS) < 0)
goto err;
ifa->sk = sk;
return 1;
err:
2014-05-18 17:42:26 +08:00
sk_log_error(sk, ifa->ra->p.name);
rfree(sk);
return 0;
}