Basic VRF support
Add basic VRF (virtual routing and forwarding) support. Protocols can be associated with VRFs, such protocols will be restricted to interfaces assigned to the VRF (as reported by Linux kernel) and will use sockets bound to the VRF. E.g., different multihop BGP instances can use diffent kernel routing tables to handle BGP TCP connections. The VRF support is preliminary, currently there are several limitations: - Recent Linux kernels (4.11) do not handle correctly sockets bound to interaces that are part of VRF, so most protocols other than multihop BGP do not work. This will be fixed by future kernel versions. - Neighbor cache ignores VRFs. Breaks config with the same prefix on local interfaces in different VRFs. Not much problem as single hop protocols do not work anyways. - Olock code ignores VRFs. Breaks config with multiple BGP peers with the same IP address in different VRFs. - Incoming BGP connections are not dispatched according to VRFs. Breaks config with multiple BGP peers with the same IP address in different VRFs. Perhaps we would need some kernel API to read VRF of incoming connection? Or probably use multiple listening sockets in int-new branch. - We should handle master VRF interface up/down events and perhaps disable associated protocols when VRF goes down. Or at least disable associated interfaces. - Also we should check if the master iface is really VRF iface and not some other kind of master iface. - BFD session request dispatch should be aware of VRFs. - Perhaps kernel protocol should read default kernel table ID from VRF iface so it is not necessary to configure it. - Perhaps we should have per-VRF default table.
This commit is contained in:
parent
98bb80a243
commit
943478b00f
14 changed files with 81 additions and 8 deletions
|
@ -598,6 +598,15 @@ agreement").
|
|||
|
||||
<tag><label id="proto-table">table <m/name/</tag>
|
||||
Connect this protocol to a non-default routing table.
|
||||
|
||||
<tag><label id="proto-vrf">vrf "<m/text/"</tag>
|
||||
Associate the protocol with specific VRF. The protocol will be
|
||||
restricted to interfaces assigned to the VRF and will use sockets bound
|
||||
to the VRF. Appropriate VRF interface must exist on OS level. For kernel
|
||||
protocol, an appropriate table still must be explicitly selected by
|
||||
<cf/table/ option. Note that the VRF support in BIRD and Linux kernel
|
||||
(4.11) is still in development and is currently problematic outside of
|
||||
multihop BGP.
|
||||
</descrip>
|
||||
|
||||
<p>There are several options that give sense only with certain protocols:
|
||||
|
|
|
@ -26,6 +26,7 @@ typedef struct birdsock {
|
|||
int ttl; /* Time To Live, -1 = default */
|
||||
u32 flags;
|
||||
struct iface *iface; /* Interface; specify this for broad/multicast sockets */
|
||||
struct iface *vrf; /* Related VRF instance, NULL if global */
|
||||
|
||||
byte *rbuf, *rpos; /* NULL=allocate automatically */
|
||||
uint fast_rx; /* RX has higher priority in event loop */
|
||||
|
|
|
@ -55,7 +55,7 @@ get_passwords(void)
|
|||
CF_DECLS
|
||||
|
||||
CF_KEYWORDS(ROUTER, ID, PROTOCOL, TEMPLATE, PREFERENCE, DISABLED, DEBUG, ALL, OFF, DIRECT)
|
||||
CF_KEYWORDS(INTERFACE, IMPORT, EXPORT, FILTER, NONE, TABLE, STATES, ROUTES, FILTERS)
|
||||
CF_KEYWORDS(INTERFACE, IMPORT, EXPORT, FILTER, NONE, VRF, TABLE, STATES, ROUTES, FILTERS)
|
||||
CF_KEYWORDS(RECEIVE, LIMIT, ACTION, WARN, BLOCK, RESTART, DISABLE, KEEP, FILTERED)
|
||||
CF_KEYWORDS(PASSWORD, FROM, PASSIVE, TO, ID, EVENTS, PACKETS, PROTOCOLS, INTERFACES)
|
||||
CF_KEYWORDS(ALGORITHM, KEYED, HMAC, MD5, SHA1, SHA256, SHA384, SHA512)
|
||||
|
@ -227,6 +227,7 @@ proto_item:
|
|||
| IMPORT LIMIT limit_spec { this_proto->in_limit = $3; }
|
||||
| EXPORT LIMIT limit_spec { this_proto->out_limit = $3; }
|
||||
| IMPORT KEEP FILTERED bool { this_proto->in_keep_filtered = $4; }
|
||||
| VRF TEXT { this_proto->vrf = if_get_by_name($2); }
|
||||
| TABLE rtable { this_proto->table = $2; }
|
||||
| ROUTER ID idval { this_proto->router_id = $3; }
|
||||
| DESCRIPTION text { this_proto->dsc = $2; }
|
||||
|
|
19
nest/iface.c
19
nest/iface.c
|
@ -116,7 +116,7 @@ if_what_changed(struct iface *i, struct iface *j)
|
|||
unsigned c;
|
||||
|
||||
if (((i->flags ^ j->flags) & ~(IF_UP | IF_SHUTDOWN | IF_UPDATED | IF_ADMIN_UP | IF_LINK_UP | IF_TMP_DOWN | IF_JUST_CREATED))
|
||||
|| i->index != j->index)
|
||||
|| (i->index != j->index) || (i->master != j->master))
|
||||
return IF_CHANGE_TOO_MUCH;
|
||||
c = 0;
|
||||
if ((i->flags ^ j->flags) & IF_UP)
|
||||
|
@ -133,12 +133,14 @@ if_copy(struct iface *to, struct iface *from)
|
|||
{
|
||||
to->flags = from->flags | (to->flags & IF_TMP_DOWN);
|
||||
to->mtu = from->mtu;
|
||||
to->master_index = from->master_index;
|
||||
to->master = from->master;
|
||||
}
|
||||
|
||||
static inline void
|
||||
ifa_send_notify(struct proto *p, unsigned c, struct ifa *a)
|
||||
{
|
||||
if (p->ifa_notify)
|
||||
if (p->ifa_notify && (!p->vrf || p->vrf == a->iface->master))
|
||||
{
|
||||
if (p->debug & D_IFACES)
|
||||
log(L_TRACE "%s <%s address %I/%d on interface %s %s",
|
||||
|
@ -175,7 +177,7 @@ ifa_notify_change(unsigned c, struct ifa *a)
|
|||
static inline void
|
||||
if_send_notify(struct proto *p, unsigned c, struct iface *i)
|
||||
{
|
||||
if (p->if_notify)
|
||||
if (p->if_notify && (!p->vrf || p->vrf == i->master))
|
||||
{
|
||||
if (p->debug & D_IFACES)
|
||||
log(L_TRACE "%s < interface %s %s", p->name, i->name,
|
||||
|
@ -238,7 +240,8 @@ if_recalc_flags(struct iface *i, unsigned flags)
|
|||
{
|
||||
if ((flags & (IF_SHUTDOWN | IF_TMP_DOWN)) ||
|
||||
!(flags & IF_ADMIN_UP) ||
|
||||
!i->addr)
|
||||
!i->addr ||
|
||||
(i->master_index && !i->master))
|
||||
flags &= ~IF_UP;
|
||||
else
|
||||
flags |= IF_UP;
|
||||
|
@ -771,7 +774,13 @@ if_show(void)
|
|||
if (i->flags & IF_SHUTDOWN)
|
||||
continue;
|
||||
|
||||
cli_msg(-1001, "%s %s (index=%d)", i->name, (i->flags & IF_UP) ? "up" : "DOWN", i->index);
|
||||
char mbuf[16 + sizeof(i->name)] = {};
|
||||
if (i->master)
|
||||
bsprintf(mbuf, " master=%s", i->master->name);
|
||||
else if (i->master_index)
|
||||
bsprintf(mbuf, " master=#%u", i->master_index);
|
||||
|
||||
cli_msg(-1001, "%s %s (index=%d%s)", i->name, (i->flags & IF_UP) ? "up" : "DOWN", i->index, mbuf);
|
||||
if (!(i->flags & IF_MULTIACCESS))
|
||||
type = "PtP";
|
||||
else
|
||||
|
|
|
@ -34,8 +34,10 @@ struct iface {
|
|||
unsigned flags;
|
||||
unsigned mtu;
|
||||
unsigned index; /* OS-dependent interface index */
|
||||
unsigned master_index; /* Interface index of master iface */
|
||||
list addrs; /* Addresses assigned to this interface */
|
||||
struct ifa *addr; /* Primary address */
|
||||
struct iface *master; /* Master iface (e.g. for VRF) */
|
||||
list neighbors; /* All neighbors on this interface */
|
||||
};
|
||||
|
||||
|
|
|
@ -386,6 +386,7 @@ proto_init(struct proto_config *c)
|
|||
q->core_state = FS_HUNGRY;
|
||||
q->export_state = ES_DOWN;
|
||||
q->last_state_change = now;
|
||||
q->vrf = c->vrf;
|
||||
|
||||
add_tail(&initial_proto_list, &q->n);
|
||||
|
||||
|
@ -409,6 +410,7 @@ proto_reconfigure(struct proto *p, struct proto_config *oc, struct proto_config
|
|||
/* If there is a too big change in core attributes, ... */
|
||||
if ((nc->protocol != oc->protocol) ||
|
||||
(nc->disabled != p->disabled) ||
|
||||
(nc->vrf != oc->vrf) ||
|
||||
(nc->table->table != oc->table->table))
|
||||
return 0;
|
||||
|
||||
|
@ -1474,7 +1476,9 @@ proto_show_limit(struct proto_limit *l, const char *dsc)
|
|||
void
|
||||
proto_show_basic_info(struct proto *p)
|
||||
{
|
||||
// cli_msg(-1006, " Table: %s", p->table->name);
|
||||
if (p->vrf)
|
||||
cli_msg(-1006, " VRF: %s", p->vrf->name);
|
||||
|
||||
cli_msg(-1006, " Preference: %d", p->preference);
|
||||
cli_msg(-1006, " Input filter: %s", filter_name(p->cf->in_filter));
|
||||
cli_msg(-1006, " Output filter: %s", filter_name(p->cf->out_filter));
|
||||
|
|
|
@ -94,6 +94,7 @@ struct proto_config {
|
|||
unsigned preference, disabled; /* Generic parameters */
|
||||
int in_keep_filtered; /* Routes rejected in import filter are kept */
|
||||
u32 router_id; /* Protocol specific router ID */
|
||||
struct iface *vrf; /* Related VRF instance, NULL if global */
|
||||
struct rtable_config *table; /* Table we're attached to */
|
||||
struct filter *in_filter, *out_filter; /* Attached filters */
|
||||
struct proto_limit *rx_limit; /* Limit for receiving routes from protocol
|
||||
|
@ -213,6 +214,7 @@ struct proto {
|
|||
void (*rte_insert)(struct network *, struct rte *);
|
||||
void (*rte_remove)(struct network *, struct rte *);
|
||||
|
||||
struct iface *vrf; /* Related VRF instance, NULL if global */
|
||||
struct rtable *table; /* Our primary routing table */
|
||||
struct rte_src *main_source; /* Primary route source */
|
||||
struct announce_hook *main_ahook; /* Primary announcement hook */
|
||||
|
|
|
@ -1080,6 +1080,7 @@ babel_open_socket(struct babel_iface *ifa)
|
|||
sk->sport = ifa->cf->port;
|
||||
sk->dport = ifa->cf->port;
|
||||
sk->iface = ifa->iface;
|
||||
sk->vrf = p->p.vrf;
|
||||
|
||||
sk->rx_hook = babel_rx_hook;
|
||||
sk->tx_hook = babel_tx_hook;
|
||||
|
|
|
@ -745,6 +745,7 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c
|
|||
s->daddr = p->cf->remote_ip;
|
||||
s->dport = p->cf->remote_port;
|
||||
s->iface = p->neigh ? p->neigh->iface : NULL;
|
||||
s->vrf = p->p.vrf;
|
||||
s->ttl = p->cf->ttl_security ? 255 : hops;
|
||||
s->rbsize = p->cf->enable_extended_messages ? BGP_RX_BUFFER_EXT_SIZE : BGP_RX_BUFFER_SIZE;
|
||||
s->tbsize = p->cf->enable_extended_messages ? BGP_TX_BUFFER_EXT_SIZE : BGP_TX_BUFFER_SIZE;
|
||||
|
|
|
@ -118,6 +118,7 @@ ospf_sk_open(struct ospf_iface *ifa)
|
|||
sk->dport = OSPF_PROTO;
|
||||
sk->saddr = ifa->addr->ip;
|
||||
sk->iface = ifa->iface;
|
||||
sk->vrf = p->p.vrf;
|
||||
|
||||
sk->tos = ifa->cf->tx_tos;
|
||||
sk->priority = ifa->cf->tx_priority;
|
||||
|
@ -200,6 +201,7 @@ ospf_open_vlink_sk(struct ospf_proto *p)
|
|||
sock *sk = sk_new(p->p.pool);
|
||||
sk->type = SK_IP;
|
||||
sk->dport = OSPF_PROTO;
|
||||
sk->vrf = p->p.vrf;
|
||||
|
||||
/* FIXME: configurable tos/priority ? */
|
||||
sk->tos = IP_PREC_INTERNET_CONTROL;
|
||||
|
|
|
@ -388,6 +388,7 @@ radv_sk_open(struct radv_iface *ifa)
|
|||
sk->type = SK_IP;
|
||||
sk->dport = ICMPV6_PROTO;
|
||||
sk->saddr = ifa->addr->ip;
|
||||
sk->vrf = ifa->ra->p.vrf;
|
||||
|
||||
sk->ttl = 255; /* Mandatory for Neighbor Discovery packets */
|
||||
sk->rx_hook = radv_rx_hook;
|
||||
|
|
|
@ -739,6 +739,7 @@ rip_open_socket(struct rip_iface *ifa)
|
|||
sk->sport = ifa->cf->port;
|
||||
sk->dport = ifa->cf->port;
|
||||
sk->iface = ifa->iface;
|
||||
sk->vrf = p->p.vrf;
|
||||
|
||||
/*
|
||||
* For RIPv2, we explicitly choose a primary address, mainly to ensure that
|
||||
|
|
|
@ -300,6 +300,7 @@ struct nl_want_attrs {
|
|||
static struct nl_want_attrs ifla_attr_want[BIRD_IFLA_MAX] = {
|
||||
[IFLA_IFNAME] = { 1, 0, 0 },
|
||||
[IFLA_MTU] = { 1, 1, sizeof(u32) },
|
||||
[IFLA_MASTER] = { 1, 1, sizeof(u32) },
|
||||
[IFLA_WIRELESS] = { 1, 0, 0 },
|
||||
};
|
||||
|
||||
|
@ -618,7 +619,7 @@ nl_parse_link(struct nlmsghdr *h, int scan)
|
|||
struct iface f = {};
|
||||
struct iface *ifi;
|
||||
char *name;
|
||||
u32 mtu;
|
||||
u32 mtu, master = 0;
|
||||
uint fl;
|
||||
|
||||
if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFLA_RTA(i), ifla_attr_want, a, sizeof(a)))
|
||||
|
@ -641,6 +642,9 @@ nl_parse_link(struct nlmsghdr *h, int scan)
|
|||
name = RTA_DATA(a[IFLA_IFNAME]);
|
||||
mtu = rta_get_u32(a[IFLA_MTU]);
|
||||
|
||||
if (a[IFLA_MASTER])
|
||||
master = rta_get_u32(a[IFLA_MASTER]);
|
||||
|
||||
ifi = if_find_by_index(i->ifi_index);
|
||||
if (!new)
|
||||
{
|
||||
|
@ -660,6 +664,9 @@ nl_parse_link(struct nlmsghdr *h, int scan)
|
|||
f.index = i->ifi_index;
|
||||
f.mtu = mtu;
|
||||
|
||||
f.master_index = master;
|
||||
f.master = if_find_by_index(master);
|
||||
|
||||
fl = i->ifi_flags;
|
||||
if (fl & IFF_UP)
|
||||
f.flags |= IF_ADMIN_UP;
|
||||
|
@ -835,6 +842,26 @@ kif_do_scan(struct kif_proto *p UNUSED)
|
|||
else
|
||||
log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
|
||||
|
||||
/* Re-resolve master interface for slaves */
|
||||
struct iface *i;
|
||||
WALK_LIST(i, iface_list)
|
||||
if (i->master_index)
|
||||
{
|
||||
struct iface f = {
|
||||
.flags = i->flags,
|
||||
.mtu = i->mtu,
|
||||
.index = i->index,
|
||||
.master_index = i->master_index,
|
||||
.master = if_find_by_index(i->master_index)
|
||||
};
|
||||
|
||||
if (f.master != i->master)
|
||||
{
|
||||
memcpy(f.name, i->name, sizeof(f.name));
|
||||
if_update(&f);
|
||||
}
|
||||
}
|
||||
|
||||
nl_request_dump(BIRD_AF, RTM_GETADDR);
|
||||
while (h = nl_get_scan())
|
||||
if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
|
||||
|
|
|
@ -1211,6 +1211,18 @@ sk_setup(sock *s)
|
|||
}
|
||||
#endif
|
||||
|
||||
if (s->vrf && !s->iface)
|
||||
{
|
||||
/* Bind socket to associated VRF interface.
|
||||
This is Linux-specific, but so is SO_BINDTODEVICE. */
|
||||
#ifdef SO_BINDTODEVICE
|
||||
struct ifreq ifr = {};
|
||||
strcpy(ifr.ifr_name, s->vrf->name);
|
||||
if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
|
||||
ERR("SO_BINDTODEVICE");
|
||||
#endif
|
||||
}
|
||||
|
||||
if (s->iface)
|
||||
{
|
||||
#ifdef SO_BINDTODEVICE
|
||||
|
|
Loading…
Reference in a new issue