From 1e37e35c3ea88672c677ea7ac63fe0b9df609b0c Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Wed, 22 Mar 2017 15:00:07 +0100 Subject: [PATCH] BGP: Support for MPLS labels and VPN SAFI Basic support for SAFI 4 and 128 (MPLS labeled IP and VPN) for IPv4 and IPv6. Should work for route reflector, but does not properly handle originating routes with next hop self. Based on patches from Jan Matejka. --- lib/net.h | 8 + lib/unaligned.h | 14 ++ nest/route.h | 10 +- nest/rt-table.c | 14 +- proto/bgp/attrs.c | 84 ++++++- proto/bgp/bgp.c | 11 +- proto/bgp/bgp.h | 20 +- proto/bgp/config.Y | 4 + proto/bgp/packets.c | 526 +++++++++++++++++++++++++++++++++++++++++--- 9 files changed, 639 insertions(+), 52 deletions(-) diff --git a/lib/net.h b/lib/net.h index 16a18479..ff889e99 100644 --- a/lib/net.h +++ b/lib/net.h @@ -35,6 +35,8 @@ #define NB_MPLS (1 << NET_MPLS) #define NB_IP (NB_IP4 | NB_IP6) +#define NB_VPN (NB_VPN4 | NB_VPN6) +#define NB_FLOW (NB_FLOW4 | NB_FLOW6) #define NB_ANY 0xffffffff @@ -481,6 +483,12 @@ static inline void net_normalize_ip4(net_addr_ip4 *n) static inline void net_normalize_ip6(net_addr_ip6 *n) { n->prefix = ip6_and(n->prefix, ip6_mkmask(n->pxlen)); } +static inline void net_normalize_vpn4(net_addr_vpn4 *n) +{ net_normalize_ip4((net_addr_ip4 *) n); } + +static inline void net_normalize_vpn6(net_addr_vpn6 *n) +{ net_normalize_ip6((net_addr_ip6 *) n); } + void net_normalize(net_addr *N); diff --git a/lib/unaligned.h b/lib/unaligned.h index 4e841f3a..0da1fdb4 100644 --- a/lib/unaligned.h +++ b/lib/unaligned.h @@ -28,6 +28,13 @@ get_u16(const void *p) return ntohs(x); } +static inline u32 +get_u24(const void *P) +{ + const byte *p = P; + return (p[0] << 16) + (p[1] << 8) + p[2]; +} + static inline u32 get_u32(const void *p) { @@ -52,6 +59,13 @@ put_u16(void *p, u16 x) memcpy(p, &x, 2); } +static inline void +put_u24(void *p, u32 x) +{ + x = htonl(x); + memcpy(p, ((char *) &x) + 1, 3); +} + static inline void put_u32(void *p, u32 x) { diff --git a/nest/route.h b/nest/route.h index 546b04c4..d7d4df69 100644 --- a/nest/route.h +++ b/nest/route.h @@ -551,7 +551,15 @@ static inline rta * rta_cow(rta *r, linpool *lp) { return rta_is_cached(r) ? rta void rta_dump(rta *); void rta_dump_all(void); void rta_show(struct cli *, rta *, ea_list *); -void rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll, mpls_label_stack *mls); + +struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep); +void rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls); + +static inline void +rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll, mpls_label_stack *mls) +{ + rta_apply_hostentry(a, rt_get_hostentry(tab, gw, ll, dep), mls); +} /* * rta_set_recursive_next_hop() acquires hostentry from hostcache and fills diff --git a/nest/rt-table.c b/nest/rt-table.c index f8baf572..8be7520c 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -1766,7 +1766,7 @@ rta_next_hop_outdated(rta *a) (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh)); } -static inline void +void rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls) { a->hostentry = he; @@ -1794,7 +1794,7 @@ no_nexthop: struct nexthop *nhp = NULL, *nhr = NULL; int skip_nexthop = 0; - + for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next) { if (skip_nexthop) @@ -2475,7 +2475,7 @@ rt_update_hostcache(rtable *tab) tab->hcu_scheduled = 0; } -static struct hostentry * +struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep) { struct hostentry *he; @@ -2489,17 +2489,11 @@ rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep) if (ipa_equal(he->addr, a) && (he->tab == dep)) return he; - he = hc_new_hostentry(hc, a, ll, dep, k); + he = hc_new_hostentry(hc, a, ipa_zero(ll) ? a : ll, dep, k); rt_update_hostentry(tab, he); return he; } -void -rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll, mpls_label_stack *mls) -{ - rta_apply_hostentry(a, rt_get_hostentry(tab, gw, ipa_zero(ll) ? gw : ll, dep), mls); -} - /* * CLI commands diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index f2a8e8b5..cf9db1c8 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -629,6 +629,75 @@ bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint fla bgp_set_attr_ptr(to, s->pool, BA_LARGE_COMMUNITY, flags, ad); } +static void +bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a) +{ + net_addr *n = s->route->net->n.addr; + u32 *labels = (u32 *) a->u.ptr->data; + uint lnum = a->u.ptr->length / 4; + + /* Perhaps we should just ignore it? */ + if (!s->mpls) + WITHDRAW("Unexpected MPLS stack"); + + /* Empty MPLS stack is not allowed */ + if (!lnum) + WITHDRAW("Malformed MPLS stack - empty"); + + /* This is ugly, but we must ensure that labels fit into NLRI field */ + if ((24*lnum + (net_is_vpn(n) ? 64 : 0) + net_pxlen(n)) > 255) + WITHDRAW("Malformed MPLS stack - too many labels (%u)", lnum); + + for (uint i = 0; i < lnum; i++) + { + if (labels[i] > 0xfffff) + WITHDRAW("Malformed MPLS stack - invalid label (%u)", labels[i]); + + /* TODO: Check for special-purpose label values? */ + } +} + +static int +bgp_encode_mpls_label_stack(struct bgp_write_state *s, eattr *a, byte *buf UNUSED, uint size UNUSED) +{ + /* + * MPLS labels are encoded as a part of the NLRI in MP_REACH_NLRI attribute, + * so we store MPLS_LABEL_STACK and encode it later by AFI-specific hooks. + */ + + s->mpls_labels = a->u.ptr; + return 0; +} + +static void +bgp_decode_mpls_label_stack(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data UNUSED, uint len UNUSED, ea_list **to UNUSED) +{ + DISCARD("Discarding received attribute #0"); +} + +static void +bgp_format_mpls_label_stack(eattr *a, byte *buf, uint size) +{ + u32 *labels = (u32 *) a->u.ptr->data; + uint lnum = a->u.ptr->length / 4; + char *pos = buf; + + for (uint i = 0; i < lnum; i++) + { + if (size < 20) + { + bsprintf(pos, "..."); + return; + } + + uint l = bsprintf(pos, "%d/", labels[i]); + ADVANCE(pos, size, l); + } + + /* Clear last slash or terminate empty string */ + pos[lnum ? -1 : 0] = 0; +} + static inline void bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to) { @@ -763,6 +832,14 @@ static const struct bgp_attr_desc bgp_attr_table[] = { .encode = bgp_encode_u32s, .decode = bgp_decode_large_community, }, + [BA_MPLS_LABEL_STACK] = { + .name = "mpls_label_stack", + .type = EAF_TYPE_INT_SET, + .export = bgp_export_mpls_label_stack, + .encode = bgp_encode_mpls_label_stack, + .decode = bgp_decode_mpls_label_stack, + .format = bgp_format_mpls_label_stack, + }, }; static inline int @@ -849,7 +926,6 @@ bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs) return NULL; return new; - } @@ -1340,7 +1416,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at { struct proto *SRC = e->attrs->src->proto; struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (void *) SRC : NULL; - struct bgp_export_state s = { .proto = p, .channel =c, .pool = pool, .src = src, .route = e }; + struct bgp_export_state s = { .proto = p, .channel = c, .pool = pool, .src = src, .route = e, .mpls = c->desc->mpls }; ea_list *attrs = attrs0; eattr *a; adata *ad; @@ -1453,13 +1529,13 @@ bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old, ea if (new) { - attrs = bgp_update_attrs(p, c, new, attrs, bgp_linpool); + attrs = bgp_update_attrs(p, c, new, attrs, bgp_linpool2); /* If attributes are invalid, we fail back to withdraw */ buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c); path = new->attrs->src->global_id; - lp_flush(bgp_linpool); + lp_flush(bgp_linpool2); } else { diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 5e95e6b4..976fbd90 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -86,6 +86,7 @@ struct linpool *bgp_linpool; /* Global temporary pool */ +struct linpool *bgp_linpool2; /* Global temporary pool for bgp_rt_notify() */ static list bgp_sockets; /* Global list of listening sockets */ @@ -151,7 +152,10 @@ bgp_open(struct bgp_proto *p) add_tail(&bgp_sockets, &bs->n); if (!bgp_linpool) - bgp_linpool = lp_new(proto_pool, 4080); + { + bgp_linpool = lp_new(proto_pool, 4080); + bgp_linpool2 = lp_new(proto_pool, 4080); + } return 0; @@ -187,6 +191,9 @@ bgp_close(struct bgp_proto *p) rfree(bgp_linpool); bgp_linpool = NULL; + + rfree(bgp_linpool2); + bgp_linpool2 = NULL; } static inline int @@ -1970,7 +1977,7 @@ struct protocol proto_bgp = { .template = "bgp%d", .attr_class = EAP_BGP, .preference = DEF_PREF_BGP, - .channel_mask = NB_IP | NB_FLOW4 | NB_FLOW6, + .channel_mask = NB_IP | NB_VPN | NB_FLOW, .proto_size = sizeof(struct bgp_proto), .config_size = sizeof(struct bgp_config), .postconfig = bgp_postconfig, diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index e7647625..36fd39e8 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -31,6 +31,8 @@ struct eattr; #define BGP_SAFI_UNICAST 1 #define BGP_SAFI_MULTICAST 2 +#define BGP_SAFI_MPLS 4 +#define BGP_SAFI_MPLS_VPN 128 #define BGP_SAFI_FLOW 133 /* Internal AF codes */ @@ -43,6 +45,10 @@ struct eattr; #define BGP_AF_IPV6 BGP_AF( BGP_AFI_IPV6, BGP_SAFI_UNICAST ) #define BGP_AF_IPV4_MC BGP_AF( BGP_AFI_IPV4, BGP_SAFI_MULTICAST ) #define BGP_AF_IPV6_MC BGP_AF( BGP_AFI_IPV6, BGP_SAFI_MULTICAST ) +#define BGP_AF_IPV4_MPLS BGP_AF( BGP_AFI_IPV4, BGP_SAFI_MPLS ) +#define BGP_AF_IPV6_MPLS BGP_AF( BGP_AFI_IPV6, BGP_SAFI_MPLS ) +#define BGP_AF_VPN4_MPLS BGP_AF( BGP_AFI_IPV4, BGP_SAFI_MPLS_VPN ) +#define BGP_AF_VPN6_MPLS BGP_AF( BGP_AFI_IPV6, BGP_SAFI_MPLS_VPN ) #define BGP_AF_FLOW4 BGP_AF( BGP_AFI_IPV4, BGP_SAFI_FLOW ) #define BGP_AF_FLOW6 BGP_AF( BGP_AFI_IPV6, BGP_SAFI_FLOW ) @@ -55,6 +61,7 @@ struct bgp_bucket; struct bgp_af_desc { u32 afi; u32 net; + int mpls; const char *name; uint (*encode_nlri)(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size); void (*decode_nlri)(struct bgp_parse_state *s, byte *pos, uint len, rta *a); @@ -308,6 +315,7 @@ struct bgp_export_state { struct bgp_proto *src; rte *route; + int mpls; u32 attrs_seen[1]; uint err_withdraw; @@ -320,8 +328,10 @@ struct bgp_write_state { int as4_session; int add_path; + int mpls; eattr *mp_next_hop; + adata *mpls_labels; }; struct bgp_parse_state { @@ -331,14 +341,13 @@ struct bgp_parse_state { int as4_session; int add_path; + int mpls; u32 attrs_seen[256/32]; u32 mp_reach_af; u32 mp_unreach_af; - mpls_label_stack mls; - uint attr_len; uint ip_reach_len; uint ip_unreach_len; @@ -359,6 +368,9 @@ struct bgp_parse_state { uint err_subcode; jmp_buf err_jmpbuf; + struct hostentry *hostentry; + adata *mpls_labels; + /* Cached state for bgp_rte_update() */ u32 last_id; struct rte_src *last_src; @@ -392,6 +404,7 @@ bgp_parse_error(struct bgp_parse_state *s, uint subcode) } extern struct linpool *bgp_linpool; +extern struct linpool *bgp_linpool2; void bgp_start_timer(struct timer *t, int value); @@ -528,6 +541,9 @@ void bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to); #define BA_AS4_AGGREGATOR 0x12 /* RFC 6793 */ #define BA_LARGE_COMMUNITY 0x20 /* RFC 8092 */ +/* Bird's private internal BGP attributes */ +#define BA_MPLS_LABEL_STACK 0xfe /* MPLS label stack transfer attribute */ + /* BGP connection states */ #define BS_IDLE 0 diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index 8c63b331..e23c5b3b 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -139,6 +139,10 @@ bgp_afi: | IPV6 { $$ = BGP_AF_IPV6; } | IPV4 MULTICAST { $$ = BGP_AF_IPV4_MC; } | IPV6 MULTICAST { $$ = BGP_AF_IPV6_MC; } + | IPV4 MPLS { $$ = BGP_AF_IPV4_MPLS; } + | IPV6 MPLS { $$ = BGP_AF_IPV6_MPLS; } + | VPN4 MPLS { $$ = BGP_AF_VPN4_MPLS; } + | VPN6 MPLS { $$ = BGP_AF_VPN6_MPLS; } | FLOW4 { $$ = BGP_AF_FLOW4; } | FLOW6 { $$ = BGP_AF_FLOW6; } ; diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index f7366804..5953c43a 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -32,6 +32,13 @@ #define BGP_RR_BEGIN 1 #define BGP_RR_END 2 +#define BGP_NLRI_MAX (4 + 1 + 32) + +#define BGP_MPLS_BOS 1 /* Bottom-of-stack bit */ +#define BGP_MPLS_MAX 10 /* Max number of labels that 24*n <= 255 */ +#define BGP_MPLS_NULL 3 /* Implicit NULL label */ +#define BGP_MPLS_MAGIC 0x800000 /* Magic withdraw label value, RFC 3107 3 */ + static struct tbf rl_rcv_update = TBF_DEFAULT_LOG_LIMITS; static struct tbf rl_snd_update = TBF_DEFAULT_LOG_LIMITS; @@ -282,8 +289,8 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf) /* Create capability list in buffer */ /* - * Note that max length is ~ 20+14*af_count. With max 6 channels that is - * 104. Option limit is 253 and buffer size is 4096, so we cannot overflow + * Note that max length is ~ 20+14*af_count. With max 10 channels that is + * 160. Option limit is 253 and buffer size is 4096, so we cannot overflow * unless we add new capabilities or more AFs. */ @@ -722,6 +729,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len) #define BAD_AFI "Unexpected AF <%u/%u> in UPDATE" #define BAD_NEXT_HOP "Invalid NEXT_HOP attribute" #define NO_NEXT_HOP "Missing NEXT_HOP attribute" +#define NO_LABEL_STACK "Missing MPLS stack" static void @@ -744,19 +752,56 @@ bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll) WITHDRAW(BAD_NEXT_HOP); a->dest = RTD_UNICAST; - a->nh = (struct nexthop){ .gw = nbr->addr, .iface = nbr->iface }; - a->hostentry = NULL; - a->igp_metric = 0; + a->nh.gw = nbr->addr; + a->nh.iface = nbr->iface; } else /* GW_RECURSIVE */ { if (ipa_zero(gw)) WITHDRAW(BAD_NEXT_HOP); - rta_set_recursive_next_hop(c->c.table, a, c->igp_table, gw, ll, &(s->mls)); + s->hostentry = rt_get_hostentry(c->igp_table, gw, ll, c->c.table); + + if (!s->mpls) + rta_apply_hostentry(a, s->hostentry, NULL); + + /* With MPLS, hostentry is applied later in bgp_apply_mpls_labels() */ } } +static void +bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a, u32 *labels, uint lnum) +{ + if (lnum > MPLS_MAX_LABEL_STACK) + { + REPORT("Too many MPLS labels ($u)", lnum); + + a->dest = RTD_UNREACHABLE; + a->hostentry = NULL; + a->nh = (struct nexthop) { }; + return; + } + + /* Handle implicit NULL as empty MPLS stack */ + if ((lnum == 1) && (labels[0] == BGP_MPLS_NULL)) + lnum = 0; + + if (s->channel->cf->gw_mode == GW_DIRECT) + { + a->nh.labels = lnum; + memcpy(a->nh.label, labels, 4*lnum); + } + else /* GW_RECURSIVE */ + { + mpls_label_stack ms; + + ms.len = lnum; + memcpy(ms.stack, labels, 4*lnum); + rta_apply_hostentry(a, s->hostentry, &ms); + } +} + + static inline int bgp_use_next_hop(struct bgp_export_state *s, eattr *a) { @@ -810,13 +855,26 @@ bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to) { if (bgp_use_gateway(s)) { - ip_addr nh[1] = { s->route->attrs->nh.gw }; + rta *ra = s->route->attrs; + ip_addr nh[1] = { ra->nh.gw }; bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, 16); + + if (s->mpls) + { + u32 implicit_null = BGP_MPLS_NULL; + u32 *labels = ra->nh.labels ? ra->nh.label : &implicit_null; + uint lnum = ra->nh.labels ? ra->nh.labels : 1; + bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, labels, lnum * 4); + } } else { ip_addr nh[2] = { s->channel->next_hop_addr, s->channel->link_addr }; bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16); + + /* TODO: Use local MPLS assigned label */ + if (s->mpls) + bgp_unset_attr(to, s->pool, BA_MPLS_LABEL_STACK); } } @@ -834,6 +892,10 @@ bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to) if (ipa_equal(peer, nh[0]) || ((len == 32) && ipa_equal(peer, nh[1]))) WITHDRAW(BAD_NEXT_HOP); + + /* Just check if MPLS stack */ + if (s->mpls && !bgp_find_attr(*to, BA_MPLS_LABEL_STACK)) + WITHDRAW(NO_LABEL_STACK); } static uint @@ -905,14 +967,76 @@ bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0) rte_update2(&s->channel->c, n, e, s->last_src); } +static void +bgp_encode_mpls_labels(struct bgp_write_state *s UNUSED, adata *mpls, byte **pos, uint *size, byte *pxlen) +{ + u32 dummy = 0; + u32 *labels = mpls ? (u32 *) mpls->data : &dummy; + uint lnum = mpls ? (mpls->length / 4) : 1; + for (uint i = 0; i < lnum; i++) + { + put_u24(*pos, labels[i] << 4); + ADVANCE(*pos, *size, 3); + } + + /* Add bottom-of-stack flag */ + (*pos)[-1] |= BGP_MPLS_BOS; + + *pxlen += 24 * lnum; +} + +static void +bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *pxlen, rta *a) +{ + u32 labels[BGP_MPLS_MAX], label; + uint lnum = 0; + + do { + if (*pxlen < 24) + bgp_parse_error(s, 1); + + label = get_u24(*pos); + labels[lnum++] = label >> 4; + ADVANCE(*pos, *len, 3); + *pxlen -= 24; + + /* Withdraw: Magic label stack value 0x800000 according to RFC 3107, section 3, last paragraph */ + if (!a && !s->err_withdraw && (lnum == 1) && (label == BGP_MPLS_MAGIC)) + break; + } + while (!(label & BGP_MPLS_BOS)); + + if (!a) + return; + + /* Attach MPLS attribute unless we already have one */ + if (!s->mpls_labels) + { + s->mpls_labels = lp_alloc_adata(s->pool, 4*BGP_MPLS_MAX); + bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_MPLS_LABEL_STACK, 0, s->mpls_labels); + } + + /* Overwrite data in the attribute */ + s->mpls_labels->length = 4*lnum; + memcpy(s->mpls_labels->data, labels, 4*lnum); + + /* Update next hop entry in rta */ + bgp_apply_mpls_labels(s, a, labels, lnum); + + /* Attributes were changed, invalidate cached entry */ + rta_free(s->cached_rta); + s->cached_rta = NULL; + + return; +} static uint bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size) { byte *pos = buf; - while (!EMPTY_LIST(buck->prefixes) && (size >= (5 + sizeof(ip4_addr)))) + while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX)) { struct bgp_prefix *px = HEAD(buck->prefixes); struct net_addr_ip4 *net = (void *) px->net; @@ -924,14 +1048,17 @@ bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu ADVANCE(pos, size, 4); } - ip4_addr a = ip4_hton(net->prefix); - uint b = (net->pxlen + 7) / 8; - /* Encode prefix length */ *pos = net->pxlen; ADVANCE(pos, size, 1); + /* Encode MPLS labels */ + if (s->mpls) + bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1); + /* Encode prefix body */ + ip4_addr a = ip4_hton(net->prefix); + uint b = (net->pxlen + 7) / 8; memcpy(pos, &a, b); ADVANCE(pos, size, b); @@ -961,17 +1088,21 @@ bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, rta *a) /* Decode prefix length */ uint l = *pos; - uint b = (l + 7) / 8; ADVANCE(pos, len, 1); + if (len < ((l + 7) / 8)) + bgp_parse_error(s, 1); + + /* Decode MPLS labels */ + if (s->mpls) + bgp_decode_mpls_labels(s, &pos, &len, &l, a); + if (l > IP4_MAX_PREFIX_LENGTH) bgp_parse_error(s, 10); - if (len < b) - bgp_parse_error(s, 1); - /* Decode prefix body */ ip4_addr addr = IP4_NONE; + uint b = (l + 7) / 8; memcpy(&addr, pos, b); ADVANCE(pos, len, b); @@ -1016,7 +1147,7 @@ bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu { byte *pos = buf; - while (!EMPTY_LIST(buck->prefixes) && (size >= (5 + sizeof(ip6_addr)))) + while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX)) { struct bgp_prefix *px = HEAD(buck->prefixes); struct net_addr_ip6 *net = (void *) px->net; @@ -1028,14 +1159,17 @@ bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu ADVANCE(pos, size, 4); } - ip6_addr a = ip6_hton(net->prefix); - uint b = (net->pxlen + 7) / 8; - /* Encode prefix length */ *pos = net->pxlen; ADVANCE(pos, size, 1); + /* Encode MPLS labels */ + if (s->mpls) + bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1); + /* Encode prefix body */ + ip6_addr a = ip6_hton(net->prefix); + uint b = (net->pxlen + 7) / 8; memcpy(pos, &a, b); ADVANCE(pos, size, b); @@ -1065,17 +1199,21 @@ bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, rta *a) /* Decode prefix length */ uint l = *pos; - uint b = (l + 7) / 8; ADVANCE(pos, len, 1); + if (len < ((l + 7) / 8)) + bgp_parse_error(s, 1); + + /* Decode MPLS labels */ + if (s->mpls) + bgp_decode_mpls_labels(s, &pos, &len, &l, a); + if (l > IP6_MAX_PREFIX_LENGTH) bgp_parse_error(s, 10); - if (len < b) - bgp_parse_error(s, 1); - /* Decode prefix body */ ip6_addr addr = IP6_NONE; + uint b = (l + 7) / 8; memcpy(&addr, pos, b); ADVANCE(pos, len, b); @@ -1135,6 +1273,282 @@ bgp_decode_next_hop_ip6(struct bgp_parse_state *s, byte *data, uint len, rta *a) } +static uint +bgp_encode_nlri_vpn4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size) +{ + byte *pos = buf; + + while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX)) + { + struct bgp_prefix *px = HEAD(buck->prefixes); + struct net_addr_vpn4 *net = (void *) px->net; + + /* Encode path ID */ + if (s->add_path) + { + put_u32(pos, px->path_id); + ADVANCE(pos, size, 4); + } + + /* Encode prefix length */ + *pos = net->pxlen; + ADVANCE(pos, size, 1); + + /* Encode MPLS labels */ + bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1); + + /* Encode route distinguisher */ + put_u64(pos, net->rd); + ADVANCE(pos, size, 8); + + /* Encode prefix body */ + ip4_addr a = ip4_hton(net->prefix); + uint b = (net->pxlen + 7) / 8; + memcpy(pos, &a, b); + ADVANCE(pos, size, b); + + bgp_free_prefix(s->channel, px); + } + + return pos - buf; +} + +static void +bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, rta *a) +{ + while (len) + { + net_addr_vpn4 net; + u32 path_id = 0; + + /* Decode path ID */ + if (s->add_path) + { + if (len < 5) + bgp_parse_error(s, 1); + + path_id = get_u32(pos); + ADVANCE(pos, len, 4); + } + + /* Decode prefix length */ + uint l = *pos; + ADVANCE(pos, len, 1); + + if (len < ((l + 7) / 8)) + bgp_parse_error(s, 1); + + /* Decode MPLS labels */ + bgp_decode_mpls_labels(s, &pos, &len, &l, a); + + /* Decode route distinguisher */ + if (l < 64) + bgp_parse_error(s, 1); + + u64 rd = get_u64(pos); + ADVANCE(pos, len, 8); + l -= 64; + + if (l > IP4_MAX_PREFIX_LENGTH) + bgp_parse_error(s, 10); + + /* Decode prefix body */ + ip4_addr addr = IP4_NONE; + uint b = (l + 7) / 8; + memcpy(&addr, pos, b); + ADVANCE(pos, len, b); + + net = NET_ADDR_VPN4(ip4_ntoh(addr), l, rd); + net_normalize_vpn4(&net); + + // XXXX validate prefix + + bgp_rte_update(s, (net_addr *) &net, path_id, a); + } +} + +static uint +bgp_encode_next_hop_vpn4(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size UNUSED) +{ + /* This function is used only for MP-BGP, see bgp_encode_next_hop() for IPv4 BGP */ + + ASSERT(a->u.ptr->length == sizeof(ip_addr)); + + put_u64(buf, 0); /* VPN RD is 0 */ + put_ip4(buf+8, ipa_to_ip4( *(ip_addr *) a->u.ptr->data )); + + return 12; +} + +static void +bgp_decode_next_hop_vpn4(struct bgp_parse_state *s, byte *data, uint len, rta *a) +{ + if (len != 12) + bgp_parse_error(s, 9); + + /* XXXX which error */ + if (get_u64(data) != 0) + bgp_parse_error(s, 9); + + ip_addr nh = ipa_from_ip4(get_ip4(data+8)); + + // XXXX validate next hop + + bgp_set_attr_data(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, &nh, sizeof(nh)); + bgp_apply_next_hop(s, a, nh, IPA_NONE); +} + + +static uint +bgp_encode_nlri_vpn6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size) +{ + byte *pos = buf; + + while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX)) + { + struct bgp_prefix *px = HEAD(buck->prefixes); + struct net_addr_vpn6 *net = (void *) px->net; + + /* Encode path ID */ + if (s->add_path) + { + put_u32(pos, px->path_id); + ADVANCE(pos, size, 4); + } + + /* Encode prefix length */ + *pos = net->pxlen; + ADVANCE(pos, size, 1); + + /* Encode MPLS labels */ + bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1); + + /* Encode route distinguisher */ + put_u64(pos, net->rd); + ADVANCE(pos, size, 8); + + /* Encode prefix body */ + ip6_addr a = ip6_hton(net->prefix); + uint b = (net->pxlen + 7) / 8; + memcpy(pos, &a, b); + ADVANCE(pos, size, b); + + bgp_free_prefix(s->channel, px); + } + + return pos - buf; +} + +static void +bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, rta *a) +{ + while (len) + { + net_addr_vpn6 net; + u32 path_id = 0; + + /* Decode path ID */ + if (s->add_path) + { + if (len < 5) + bgp_parse_error(s, 1); + + path_id = get_u32(pos); + ADVANCE(pos, len, 4); + } + + /* Decode prefix length */ + uint l = *pos; + ADVANCE(pos, len, 1); + + if (len < ((l + 7) / 8)) + bgp_parse_error(s, 1); + + /* Decode MPLS labels */ + if (s->mpls) + bgp_decode_mpls_labels(s, &pos, &len, &l, a); + + /* Decode route distinguisher */ + if (l < 64) + bgp_parse_error(s, 1); + + u64 rd = get_u64(pos); + ADVANCE(pos, len, 8); + l -= 64; + + if (l > IP6_MAX_PREFIX_LENGTH) + bgp_parse_error(s, 10); + + /* Decode prefix body */ + ip6_addr addr = IP6_NONE; + uint b = (l + 7) / 8; + memcpy(&addr, pos, b); + ADVANCE(pos, len, b); + + net = NET_ADDR_VPN6(ip6_ntoh(addr), l, rd); + net_normalize_vpn6(&net); + + // XXXX validate prefix + + bgp_rte_update(s, (net_addr *) &net, path_id, a); + } +} + +static uint +bgp_encode_next_hop_vpn6(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size UNUSED) +{ + ip_addr *nh = (void *) a->u.ptr->data; + uint len = a->u.ptr->length; + + ASSERT((len == 16) || (len == 32)); + + put_u64(buf, 0); /* VPN RD is 0 */ + put_ip6(buf+8, ipa_to_ip6(nh[0])); + + if (len == 16) + return 24; + + put_u64(buf+24, 0); /* VPN RD is 0 */ + put_ip6(buf+32, ipa_to_ip6(nh[1])); + + return 48; +} + +static void +bgp_decode_next_hop_vpn6(struct bgp_parse_state *s, byte *data, uint len, rta *a) +{ + struct adata *ad = lp_alloc_adata(s->pool, 32); + ip_addr *nh = (void *) ad->data; + + if ((len != 24) && (len != 48)) + bgp_parse_error(s, 9); + + /* XXXX which error */ + if ((get_u64(data) != 0) || ((len == 48) && (get_u64(data+24) != 0))) + bgp_parse_error(s, 9); + + nh[0] = ipa_from_ip6(get_ip6(data+8)); + nh[1] = (len == 48) ? ipa_from_ip6(get_ip6(data+32)) : IPA_NONE; + + if (ip6_is_link_local(nh[0])) + { + nh[1] = nh[0]; + nh[0] = IPA_NONE; + } + + if (!ip6_is_link_local(nh[1])) + nh[1] = IPA_NONE; + + if (ipa_zero(nh[1])) + ad->length = 16; + + // XXXX validate next hop + + bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad); + bgp_apply_next_hop(s, a, nh[0], nh[1]); +} + + static uint bgp_encode_nlri_flow4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size) { @@ -1341,14 +1755,15 @@ static const struct bgp_af_desc bgp_af_table[] = { .update_next_hop = bgp_update_next_hop_ip, }, { - .afi = BGP_AF_FLOW4, - .net = NET_FLOW4, - .name = "flow4", - .encode_nlri = bgp_encode_nlri_flow4, - .decode_nlri = bgp_decode_nlri_flow4, - .encode_next_hop = bgp_encode_next_hop_none, - .decode_next_hop = bgp_decode_next_hop_none, - .update_next_hop = bgp_update_next_hop_none, + .afi = BGP_AF_IPV4_MPLS, + .net = NET_IP4, + .mpls = 1, + .name = "ipv4-mpls", + .encode_nlri = bgp_encode_nlri_ip4, + .decode_nlri = bgp_decode_nlri_ip4, + .encode_next_hop = bgp_encode_next_hop_ip4, + .decode_next_hop = bgp_decode_next_hop_ip4, + .update_next_hop = bgp_update_next_hop_ip, }, { .afi = BGP_AF_IPV6, @@ -1370,6 +1785,49 @@ static const struct bgp_af_desc bgp_af_table[] = { .decode_next_hop = bgp_decode_next_hop_ip6, .update_next_hop = bgp_update_next_hop_ip, }, + { + .afi = BGP_AF_IPV6_MPLS, + .net = NET_IP6, + .mpls = 1, + .name = "ipv6-mpls", + .encode_nlri = bgp_encode_nlri_ip6, + .decode_nlri = bgp_decode_nlri_ip6, + .encode_next_hop = bgp_encode_next_hop_ip6, + .decode_next_hop = bgp_decode_next_hop_ip6, + .update_next_hop = bgp_update_next_hop_ip, + }, + { + .afi = BGP_AF_VPN4_MPLS, + .net = NET_VPN4, + .mpls = 1, + .name = "vpn4-mpls", + .encode_nlri = bgp_encode_nlri_vpn4, + .decode_nlri = bgp_decode_nlri_vpn4, + .encode_next_hop = bgp_encode_next_hop_vpn4, + .decode_next_hop = bgp_decode_next_hop_vpn4, + .update_next_hop = bgp_update_next_hop_ip, + }, + { + .afi = BGP_AF_VPN6_MPLS, + .net = NET_VPN6, + .mpls = 1, + .name = "vpn6-mpls", + .encode_nlri = bgp_encode_nlri_vpn6, + .decode_nlri = bgp_decode_nlri_vpn6, + .encode_next_hop = bgp_encode_next_hop_vpn6, + .decode_next_hop = bgp_decode_next_hop_vpn6, + .update_next_hop = bgp_update_next_hop_ip, + }, + { + .afi = BGP_AF_FLOW4, + .net = NET_FLOW4, + .name = "flow4", + .encode_nlri = bgp_encode_nlri_flow4, + .decode_nlri = bgp_decode_nlri_flow4, + .encode_next_hop = bgp_encode_next_hop_none, + .decode_next_hop = bgp_decode_next_hop_none, + .update_next_hop = bgp_update_next_hop_none, + }, { .afi = BGP_AF_FLOW6, .net = NET_FLOW6, @@ -1566,6 +2024,8 @@ bgp_create_update(struct bgp_channel *c, byte *buf) byte *end = buf + (bgp_max_packet_length(p->conn) - BGP_HEADER_LENGTH); byte *res = NULL; +again: ; + /* Initialize write state */ struct bgp_write_state s = { .proto = p, @@ -1573,10 +2033,9 @@ bgp_create_update(struct bgp_channel *c, byte *buf) .pool = bgp_linpool, .as4_session = p->as4_session, .add_path = c->add_path_tx, + .mpls = c->desc->mpls, }; -again: - /* Try unreachable bucket */ if ((buck = c->withdraw_bucket) && !EMPTY_LIST(buck->prefixes)) { @@ -1692,6 +2151,7 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis s->channel = c; s->add_path = c->add_path_rx; + s->mpls = c->desc->mpls; s->last_id = 0; s->last_src = s->proto->p.main_source;