KRT: Fix IPv6 ECMP handling with Linux 4.11+
Starting from Linux 4.11, IPv6 ECMP routes are now notified using RTA_MULTIPATH, like IPv4 ones. The patch adds support for RTA_MULTIPATH parsing for IPv6 routes. This also enables to parse ECMP alien routes correctly. Thanks to Vincent Bernat for the original patch.
This commit is contained in:
parent
9befc7cc4f
commit
98bb80a243
1 changed files with 42 additions and 13 deletions
|
@ -59,22 +59,26 @@
|
||||||
/*
|
/*
|
||||||
* Structure nl_parse_state keeps state of received route processing. Ideally,
|
* Structure nl_parse_state keeps state of received route processing. Ideally,
|
||||||
* we could just independently parse received Netlink messages and immediately
|
* we could just independently parse received Netlink messages and immediately
|
||||||
* propagate received routes to the rest of BIRD, but Linux kernel represents
|
* propagate received routes to the rest of BIRD, but older Linux kernel (before
|
||||||
* and announces IPv6 ECMP routes not as one route with multiple next hops (like
|
* version 4.11) represents and announces IPv6 ECMP routes not as one route with
|
||||||
* RTA_MULTIPATH in IPv4 ECMP), but as a set of routes with the same prefix.
|
* multiple next hops (like RTA_MULTIPATH in IPv4 ECMP), but as a sequence of
|
||||||
|
* routes with the same prefix. More recent kernels work as with IPv4.
|
||||||
*
|
*
|
||||||
* Therefore, BIRD keeps currently processed route in nl_parse_state structure
|
* Therefore, BIRD keeps currently processed route in nl_parse_state structure
|
||||||
* and postpones its propagation until we expect it to be final; i.e., when
|
* and postpones its propagation until we expect it to be final; i.e., when
|
||||||
* non-matching route is received or when the scan ends. When another matching
|
* non-matching route is received or when the scan ends. When another matching
|
||||||
* route is received, it is merged with the already processed route to form an
|
* route is received, it is merged with the already processed route to form an
|
||||||
* ECMP route. Note that merging is done only for IPv6 (merge == 1), but the
|
* ECMP route. Note that merging is done only for IPv6 (merge == 1), but the
|
||||||
* postponing is done in both cases (for simplicity). All IPv4 routes are just
|
* postponing is done in both cases (for simplicity). All IPv4 routes or IPv6
|
||||||
* considered non-matching.
|
* routes with RTA_MULTIPATH set are just considered non-matching.
|
||||||
*
|
*
|
||||||
* This is ignored for asynchronous notifications (every notification is handled
|
* This is ignored for asynchronous notifications (every notification is handled
|
||||||
* as a separate route). It is not an issue for our routes, as we ignore such
|
* as a separate route). It is not an issue for our routes, as we ignore such
|
||||||
* notifications anyways. But importing alien IPv6 ECMP routes does not work
|
* notifications anyways. But importing alien IPv6 ECMP routes does not work
|
||||||
* properly.
|
* properly with older kernels.
|
||||||
|
*
|
||||||
|
* Whatever the kernel version is, IPv6 ECMP routes are sent as multiple routes
|
||||||
|
* for the same prefix.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
struct nl_parse_state
|
struct nl_parse_state
|
||||||
|
@ -320,9 +324,15 @@ static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = {
|
||||||
|
|
||||||
#define BIRD_RTA_MAX (RTA_TABLE+1)
|
#define BIRD_RTA_MAX (RTA_TABLE+1)
|
||||||
|
|
||||||
|
#ifndef IPV6
|
||||||
static struct nl_want_attrs mpnh_attr_want4[BIRD_RTA_MAX] = {
|
static struct nl_want_attrs mpnh_attr_want4[BIRD_RTA_MAX] = {
|
||||||
[RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
|
[RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
|
||||||
};
|
};
|
||||||
|
#else
|
||||||
|
static struct nl_want_attrs mpnh_attr_want6[BIRD_RTA_MAX] = {
|
||||||
|
[RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) },
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef IPV6
|
#ifndef IPV6
|
||||||
static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = {
|
static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = {
|
||||||
|
@ -345,6 +355,7 @@ static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = {
|
||||||
[RTA_PRIORITY] = { 1, 1, sizeof(u32) },
|
[RTA_PRIORITY] = { 1, 1, sizeof(u32) },
|
||||||
[RTA_PREFSRC] = { 1, 1, sizeof(ip6_addr) },
|
[RTA_PREFSRC] = { 1, 1, sizeof(ip6_addr) },
|
||||||
[RTA_METRICS] = { 1, 0, 0 },
|
[RTA_METRICS] = { 1, 0, 0 },
|
||||||
|
[RTA_MULTIPATH] = { 1, 0, 0 },
|
||||||
[RTA_FLOW] = { 1, 1, sizeof(u32) },
|
[RTA_FLOW] = { 1, 1, sizeof(u32) },
|
||||||
[RTA_TABLE] = { 1, 1, sizeof(u32) },
|
[RTA_TABLE] = { 1, 1, sizeof(u32) },
|
||||||
};
|
};
|
||||||
|
@ -477,7 +488,7 @@ nl_add_multipath(struct nlmsghdr *h, unsigned bufsize, struct mpnh *nh)
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct mpnh *
|
static struct mpnh *
|
||||||
nl_parse_multipath(struct krt_proto *p, struct rtattr *ra)
|
nl_parse_multipath(struct krt_proto *p, struct rtattr *ra, int af)
|
||||||
{
|
{
|
||||||
/* Temporary buffer for multicast nexthops */
|
/* Temporary buffer for multicast nexthops */
|
||||||
static struct mpnh *nh_buffer;
|
static struct mpnh *nh_buffer;
|
||||||
|
@ -515,10 +526,26 @@ nl_parse_multipath(struct krt_proto *p, struct rtattr *ra)
|
||||||
|
|
||||||
/* Nonexistent RTNH_PAYLOAD ?? */
|
/* Nonexistent RTNH_PAYLOAD ?? */
|
||||||
nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0);
|
nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0);
|
||||||
nl_parse_attrs(RTNH_DATA(nh), mpnh_attr_want4, a, sizeof(a));
|
switch (af)
|
||||||
|
{
|
||||||
|
#ifndef IPV6
|
||||||
|
case AF_INET:
|
||||||
|
if (!nl_parse_attrs(RTNH_DATA(nh), mpnh_attr_want4, a, sizeof(a)))
|
||||||
|
return NULL;
|
||||||
|
break;
|
||||||
|
#else
|
||||||
|
case AF_INET6:
|
||||||
|
if (!nl_parse_attrs(RTNH_DATA(nh), mpnh_attr_want6, a, sizeof(a)))
|
||||||
|
return NULL;
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
default:
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
if (a[RTA_GATEWAY])
|
if (a[RTA_GATEWAY])
|
||||||
{
|
{
|
||||||
memcpy(&rv->gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ip_addr));
|
memcpy(&rv->gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(rv->gw));
|
||||||
ipa_ntoh(rv->gw);
|
ipa_ntoh(rv->gw);
|
||||||
|
|
||||||
neighbor *ng = neigh_find2(&p->p, &rv->gw, rv->iface,
|
neighbor *ng = neigh_find2(&p->p, &rv->gw, rv->iface,
|
||||||
|
@ -1240,10 +1267,10 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
|
||||||
{
|
{
|
||||||
case RTN_UNICAST:
|
case RTN_UNICAST:
|
||||||
|
|
||||||
if (a[RTA_MULTIPATH] && (i->rtm_family == AF_INET))
|
if (a[RTA_MULTIPATH])
|
||||||
{
|
{
|
||||||
ra->dest = RTD_MULTIPATH;
|
ra->dest = RTD_MULTIPATH;
|
||||||
ra->nexthops = nl_parse_multipath(p, a[RTA_MULTIPATH]);
|
ra->nexthops = nl_parse_multipath(p, a[RTA_MULTIPATH], i->rtm_family);
|
||||||
if (!ra->nexthops)
|
if (!ra->nexthops)
|
||||||
{
|
{
|
||||||
log(L_ERR "KRT: Received strange multipath route %I/%d",
|
log(L_ERR "KRT: Received strange multipath route %I/%d",
|
||||||
|
@ -1385,8 +1412,10 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Ideally, now we would send the received route to the rest of kernel code.
|
* Ideally, now we would send the received route to the rest of kernel code.
|
||||||
* But IPv6 ECMP routes are sent as a sequence of routes, so we postpone it
|
* But IPv6 ECMP routes before 4.11 are sent as a sequence of routes, so we
|
||||||
* and merge next hops until the end of the sequence.
|
* postpone it and merge next hops until the end of the sequence. Note that
|
||||||
|
* proper multipath updates are rejected by nl_mergable_route(), so it is
|
||||||
|
* always the first case for them.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (!s->net)
|
if (!s->net)
|
||||||
|
|
Loading…
Reference in a new issue