diff --git a/lib/net.h b/lib/net.h index 16a18479..ff889e99 100644 --- a/lib/net.h +++ b/lib/net.h @@ -35,6 +35,8 @@ #define NB_MPLS (1 << NET_MPLS) #define NB_IP (NB_IP4 | NB_IP6) +#define NB_VPN (NB_VPN4 | NB_VPN6) +#define NB_FLOW (NB_FLOW4 | NB_FLOW6) #define NB_ANY 0xffffffff @@ -481,6 +483,12 @@ static inline void net_normalize_ip4(net_addr_ip4 *n) static inline void net_normalize_ip6(net_addr_ip6 *n) { n->prefix = ip6_and(n->prefix, ip6_mkmask(n->pxlen)); } +static inline void net_normalize_vpn4(net_addr_vpn4 *n) +{ net_normalize_ip4((net_addr_ip4 *) n); } + +static inline void net_normalize_vpn6(net_addr_vpn6 *n) +{ net_normalize_ip6((net_addr_ip6 *) n); } + void net_normalize(net_addr *N); diff --git a/lib/unaligned.h b/lib/unaligned.h index 4e841f3a..0da1fdb4 100644 --- a/lib/unaligned.h +++ b/lib/unaligned.h @@ -28,6 +28,13 @@ get_u16(const void *p) return ntohs(x); } +static inline u32 +get_u24(const void *P) +{ + const byte *p = P; + return (p[0] << 16) + (p[1] << 8) + p[2]; +} + static inline u32 get_u32(const void *p) { @@ -52,6 +59,13 @@ put_u16(void *p, u16 x) memcpy(p, &x, 2); } +static inline void +put_u24(void *p, u32 x) +{ + x = htonl(x); + memcpy(p, ((char *) &x) + 1, 3); +} + static inline void put_u32(void *p, u32 x) { diff --git a/nest/route.h b/nest/route.h index 546b04c4..d7d4df69 100644 --- a/nest/route.h +++ b/nest/route.h @@ -551,7 +551,15 @@ static inline rta * rta_cow(rta *r, linpool *lp) { return rta_is_cached(r) ? rta void rta_dump(rta *); void rta_dump_all(void); void rta_show(struct cli *, rta *, ea_list *); -void rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll, mpls_label_stack *mls); + +struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep); +void rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls); + +static inline void +rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll, mpls_label_stack *mls) +{ + rta_apply_hostentry(a, rt_get_hostentry(tab, gw, ll, dep), mls); +} /* * rta_set_recursive_next_hop() acquires hostentry from hostcache and fills diff --git a/nest/rt-table.c b/nest/rt-table.c index f8baf572..8be7520c 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -1766,7 +1766,7 @@ rta_next_hop_outdated(rta *a) (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh)); } -static inline void +void rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls) { a->hostentry = he; @@ -1794,7 +1794,7 @@ no_nexthop: struct nexthop *nhp = NULL, *nhr = NULL; int skip_nexthop = 0; - + for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next) { if (skip_nexthop) @@ -2475,7 +2475,7 @@ rt_update_hostcache(rtable *tab) tab->hcu_scheduled = 0; } -static struct hostentry * +struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep) { struct hostentry *he; @@ -2489,17 +2489,11 @@ rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep) if (ipa_equal(he->addr, a) && (he->tab == dep)) return he; - he = hc_new_hostentry(hc, a, ll, dep, k); + he = hc_new_hostentry(hc, a, ipa_zero(ll) ? a : ll, dep, k); rt_update_hostentry(tab, he); return he; } -void -rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll, mpls_label_stack *mls) -{ - rta_apply_hostentry(a, rt_get_hostentry(tab, gw, ipa_zero(ll) ? gw : ll, dep), mls); -} - /* * CLI commands diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index f2a8e8b5..cf9db1c8 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -629,6 +629,75 @@ bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint fla bgp_set_attr_ptr(to, s->pool, BA_LARGE_COMMUNITY, flags, ad); } +static void +bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a) +{ + net_addr *n = s->route->net->n.addr; + u32 *labels = (u32 *) a->u.ptr->data; + uint lnum = a->u.ptr->length / 4; + + /* Perhaps we should just ignore it? */ + if (!s->mpls) + WITHDRAW("Unexpected MPLS stack"); + + /* Empty MPLS stack is not allowed */ + if (!lnum) + WITHDRAW("Malformed MPLS stack - empty"); + + /* This is ugly, but we must ensure that labels fit into NLRI field */ + if ((24*lnum + (net_is_vpn(n) ? 64 : 0) + net_pxlen(n)) > 255) + WITHDRAW("Malformed MPLS stack - too many labels (%u)", lnum); + + for (uint i = 0; i < lnum; i++) + { + if (labels[i] > 0xfffff) + WITHDRAW("Malformed MPLS stack - invalid label (%u)", labels[i]); + + /* TODO: Check for special-purpose label values? */ + } +} + +static int +bgp_encode_mpls_label_stack(struct bgp_write_state *s, eattr *a, byte *buf UNUSED, uint size UNUSED) +{ + /* + * MPLS labels are encoded as a part of the NLRI in MP_REACH_NLRI attribute, + * so we store MPLS_LABEL_STACK and encode it later by AFI-specific hooks. + */ + + s->mpls_labels = a->u.ptr; + return 0; +} + +static void +bgp_decode_mpls_label_stack(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data UNUSED, uint len UNUSED, ea_list **to UNUSED) +{ + DISCARD("Discarding received attribute #0"); +} + +static void +bgp_format_mpls_label_stack(eattr *a, byte *buf, uint size) +{ + u32 *labels = (u32 *) a->u.ptr->data; + uint lnum = a->u.ptr->length / 4; + char *pos = buf; + + for (uint i = 0; i < lnum; i++) + { + if (size < 20) + { + bsprintf(pos, "..."); + return; + } + + uint l = bsprintf(pos, "%d/", labels[i]); + ADVANCE(pos, size, l); + } + + /* Clear last slash or terminate empty string */ + pos[lnum ? -1 : 0] = 0; +} + static inline void bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to) { @@ -763,6 +832,14 @@ static const struct bgp_attr_desc bgp_attr_table[] = { .encode = bgp_encode_u32s, .decode = bgp_decode_large_community, }, + [BA_MPLS_LABEL_STACK] = { + .name = "mpls_label_stack", + .type = EAF_TYPE_INT_SET, + .export = bgp_export_mpls_label_stack, + .encode = bgp_encode_mpls_label_stack, + .decode = bgp_decode_mpls_label_stack, + .format = bgp_format_mpls_label_stack, + }, }; static inline int @@ -849,7 +926,6 @@ bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs) return NULL; return new; - } @@ -1340,7 +1416,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at { struct proto *SRC = e->attrs->src->proto; struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (void *) SRC : NULL; - struct bgp_export_state s = { .proto = p, .channel =c, .pool = pool, .src = src, .route = e }; + struct bgp_export_state s = { .proto = p, .channel = c, .pool = pool, .src = src, .route = e, .mpls = c->desc->mpls }; ea_list *attrs = attrs0; eattr *a; adata *ad; @@ -1453,13 +1529,13 @@ bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old, ea if (new) { - attrs = bgp_update_attrs(p, c, new, attrs, bgp_linpool); + attrs = bgp_update_attrs(p, c, new, attrs, bgp_linpool2); /* If attributes are invalid, we fail back to withdraw */ buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c); path = new->attrs->src->global_id; - lp_flush(bgp_linpool); + lp_flush(bgp_linpool2); } else { diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 5e95e6b4..976fbd90 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -86,6 +86,7 @@ struct linpool *bgp_linpool; /* Global temporary pool */ +struct linpool *bgp_linpool2; /* Global temporary pool for bgp_rt_notify() */ static list bgp_sockets; /* Global list of listening sockets */ @@ -151,7 +152,10 @@ bgp_open(struct bgp_proto *p) add_tail(&bgp_sockets, &bs->n); if (!bgp_linpool) - bgp_linpool = lp_new(proto_pool, 4080); + { + bgp_linpool = lp_new(proto_pool, 4080); + bgp_linpool2 = lp_new(proto_pool, 4080); + } return 0; @@ -187,6 +191,9 @@ bgp_close(struct bgp_proto *p) rfree(bgp_linpool); bgp_linpool = NULL; + + rfree(bgp_linpool2); + bgp_linpool2 = NULL; } static inline int @@ -1970,7 +1977,7 @@ struct protocol proto_bgp = { .template = "bgp%d", .attr_class = EAP_BGP, .preference = DEF_PREF_BGP, - .channel_mask = NB_IP | NB_FLOW4 | NB_FLOW6, + .channel_mask = NB_IP | NB_VPN | NB_FLOW, .proto_size = sizeof(struct bgp_proto), .config_size = sizeof(struct bgp_config), .postconfig = bgp_postconfig, diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index e7647625..36fd39e8 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -31,6 +31,8 @@ struct eattr; #define BGP_SAFI_UNICAST 1 #define BGP_SAFI_MULTICAST 2 +#define BGP_SAFI_MPLS 4 +#define BGP_SAFI_MPLS_VPN 128 #define BGP_SAFI_FLOW 133 /* Internal AF codes */ @@ -43,6 +45,10 @@ struct eattr; #define BGP_AF_IPV6 BGP_AF( BGP_AFI_IPV6, BGP_SAFI_UNICAST ) #define BGP_AF_IPV4_MC BGP_AF( BGP_AFI_IPV4, BGP_SAFI_MULTICAST ) #define BGP_AF_IPV6_MC BGP_AF( BGP_AFI_IPV6, BGP_SAFI_MULTICAST ) +#define BGP_AF_IPV4_MPLS BGP_AF( BGP_AFI_IPV4, BGP_SAFI_MPLS ) +#define BGP_AF_IPV6_MPLS BGP_AF( BGP_AFI_IPV6, BGP_SAFI_MPLS ) +#define BGP_AF_VPN4_MPLS BGP_AF( BGP_AFI_IPV4, BGP_SAFI_MPLS_VPN ) +#define BGP_AF_VPN6_MPLS BGP_AF( BGP_AFI_IPV6, BGP_SAFI_MPLS_VPN ) #define BGP_AF_FLOW4 BGP_AF( BGP_AFI_IPV4, BGP_SAFI_FLOW ) #define BGP_AF_FLOW6 BGP_AF( BGP_AFI_IPV6, BGP_SAFI_FLOW ) @@ -55,6 +61,7 @@ struct bgp_bucket; struct bgp_af_desc { u32 afi; u32 net; + int mpls; const char *name; uint (*encode_nlri)(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size); void (*decode_nlri)(struct bgp_parse_state *s, byte *pos, uint len, rta *a); @@ -308,6 +315,7 @@ struct bgp_export_state { struct bgp_proto *src; rte *route; + int mpls; u32 attrs_seen[1]; uint err_withdraw; @@ -320,8 +328,10 @@ struct bgp_write_state { int as4_session; int add_path; + int mpls; eattr *mp_next_hop; + adata *mpls_labels; }; struct bgp_parse_state { @@ -331,14 +341,13 @@ struct bgp_parse_state { int as4_session; int add_path; + int mpls; u32 attrs_seen[256/32]; u32 mp_reach_af; u32 mp_unreach_af; - mpls_label_stack mls; - uint attr_len; uint ip_reach_len; uint ip_unreach_len; @@ -359,6 +368,9 @@ struct bgp_parse_state { uint err_subcode; jmp_buf err_jmpbuf; + struct hostentry *hostentry; + adata *mpls_labels; + /* Cached state for bgp_rte_update() */ u32 last_id; struct rte_src *last_src; @@ -392,6 +404,7 @@ bgp_parse_error(struct bgp_parse_state *s, uint subcode) } extern struct linpool *bgp_linpool; +extern struct linpool *bgp_linpool2; void bgp_start_timer(struct timer *t, int value); @@ -528,6 +541,9 @@ void bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to); #define BA_AS4_AGGREGATOR 0x12 /* RFC 6793 */ #define BA_LARGE_COMMUNITY 0x20 /* RFC 8092 */ +/* Bird's private internal BGP attributes */ +#define BA_MPLS_LABEL_STACK 0xfe /* MPLS label stack transfer attribute */ + /* BGP connection states */ #define BS_IDLE 0 diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index 8c63b331..e23c5b3b 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -139,6 +139,10 @@ bgp_afi: | IPV6 { $$ = BGP_AF_IPV6; } | IPV4 MULTICAST { $$ = BGP_AF_IPV4_MC; } | IPV6 MULTICAST { $$ = BGP_AF_IPV6_MC; } + | IPV4 MPLS { $$ = BGP_AF_IPV4_MPLS; } + | IPV6 MPLS { $$ = BGP_AF_IPV6_MPLS; } + | VPN4 MPLS { $$ = BGP_AF_VPN4_MPLS; } + | VPN6 MPLS { $$ = BGP_AF_VPN6_MPLS; } | FLOW4 { $$ = BGP_AF_FLOW4; } | FLOW6 { $$ = BGP_AF_FLOW6; } ; diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index f7366804..5953c43a 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -32,6 +32,13 @@ #define BGP_RR_BEGIN 1 #define BGP_RR_END 2 +#define BGP_NLRI_MAX (4 + 1 + 32) + +#define BGP_MPLS_BOS 1 /* Bottom-of-stack bit */ +#define BGP_MPLS_MAX 10 /* Max number of labels that 24*n <= 255 */ +#define BGP_MPLS_NULL 3 /* Implicit NULL label */ +#define BGP_MPLS_MAGIC 0x800000 /* Magic withdraw label value, RFC 3107 3 */ + static struct tbf rl_rcv_update = TBF_DEFAULT_LOG_LIMITS; static struct tbf rl_snd_update = TBF_DEFAULT_LOG_LIMITS; @@ -282,8 +289,8 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf) /* Create capability list in buffer */ /* - * Note that max length is ~ 20+14*af_count. With max 6 channels that is - * 104. Option limit is 253 and buffer size is 4096, so we cannot overflow + * Note that max length is ~ 20+14*af_count. With max 10 channels that is + * 160. Option limit is 253 and buffer size is 4096, so we cannot overflow * unless we add new capabilities or more AFs. */ @@ -722,6 +729,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len) #define BAD_AFI "Unexpected AF <%u/%u> in UPDATE" #define BAD_NEXT_HOP "Invalid NEXT_HOP attribute" #define NO_NEXT_HOP "Missing NEXT_HOP attribute" +#define NO_LABEL_STACK "Missing MPLS stack" static void @@ -744,19 +752,56 @@ bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll) WITHDRAW(BAD_NEXT_HOP); a->dest = RTD_UNICAST; - a->nh = (struct nexthop){ .gw = nbr->addr, .iface = nbr->iface }; - a->hostentry = NULL; - a->igp_metric = 0; + a->nh.gw = nbr->addr; + a->nh.iface = nbr->iface; } else /* GW_RECURSIVE */ { if (ipa_zero(gw)) WITHDRAW(BAD_NEXT_HOP); - rta_set_recursive_next_hop(c->c.table, a, c->igp_table, gw, ll, &(s->mls)); + s->hostentry = rt_get_hostentry(c->igp_table, gw, ll, c->c.table); + + if (!s->mpls) + rta_apply_hostentry(a, s->hostentry, NULL); + + /* With MPLS, hostentry is applied later in bgp_apply_mpls_labels() */ } } +static void +bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a, u32 *labels, uint lnum) +{ + if (lnum > MPLS_MAX_LABEL_STACK) + { + REPORT("Too many MPLS labels ($u)", lnum); + + a->dest = RTD_UNREACHABLE; + a->hostentry = NULL; + a->nh = (struct nexthop) { }; + return; + } + + /* Handle implicit NULL as empty MPLS stack */ + if ((lnum == 1) && (labels[0] == BGP_MPLS_NULL)) + lnum = 0; + + if (s->channel->cf->gw_mode == GW_DIRECT) + { + a->nh.labels = lnum; + memcpy(a->nh.label, labels, 4*lnum); + } + else /* GW_RECURSIVE */ + { + mpls_label_stack ms; + + ms.len = lnum; + memcpy(ms.stack, labels, 4*lnum); + rta_apply_hostentry(a, s->hostentry, &ms); + } +} + + static inline int bgp_use_next_hop(struct bgp_export_state *s, eattr *a) { @@ -810,13 +855,26 @@ bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to) { if (bgp_use_gateway(s)) { - ip_addr nh[1] = { s->route->attrs->nh.gw }; + rta *ra = s->route->attrs; + ip_addr nh[1] = { ra->nh.gw }; bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, 16); + + if (s->mpls) + { + u32 implicit_null = BGP_MPLS_NULL; + u32 *labels = ra->nh.labels ? ra->nh.label : &implicit_null; + uint lnum = ra->nh.labels ? ra->nh.labels : 1; + bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, labels, lnum * 4); + } } else { ip_addr nh[2] = { s->channel->next_hop_addr, s->channel->link_addr }; bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16); + + /* TODO: Use local MPLS assigned label */ + if (s->mpls) + bgp_unset_attr(to, s->pool, BA_MPLS_LABEL_STACK); } } @@ -834,6 +892,10 @@ bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to) if (ipa_equal(peer, nh[0]) || ((len == 32) && ipa_equal(peer, nh[1]))) WITHDRAW(BAD_NEXT_HOP); + + /* Just check if MPLS stack */ + if (s->mpls && !bgp_find_attr(*to, BA_MPLS_LABEL_STACK)) + WITHDRAW(NO_LABEL_STACK); } static uint @@ -905,14 +967,76 @@ bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0) rte_update2(&s->channel->c, n, e, s->last_src); } +static void +bgp_encode_mpls_labels(struct bgp_write_state *s UNUSED, adata *mpls, byte **pos, uint *size, byte *pxlen) +{ + u32 dummy = 0; + u32 *labels = mpls ? (u32 *) mpls->data : &dummy; + uint lnum = mpls ? (mpls->length / 4) : 1; + for (uint i = 0; i < lnum; i++) + { + put_u24(*pos, labels[i] << 4); + ADVANCE(*pos, *size, 3); + } + + /* Add bottom-of-stack flag */ + (*pos)[-1] |= BGP_MPLS_BOS; + + *pxlen += 24 * lnum; +} + +static void +bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *pxlen, rta *a) +{ + u32 labels[BGP_MPLS_MAX], label; + uint lnum = 0; + + do { + if (*pxlen < 24) + bgp_parse_error(s, 1); + + label = get_u24(*pos); + labels[lnum++] = label >> 4; + ADVANCE(*pos, *len, 3); + *pxlen -= 24; + + /* Withdraw: Magic label stack value 0x800000 according to RFC 3107, section 3, last paragraph */ + if (!a && !s->err_withdraw && (lnum == 1) && (label == BGP_MPLS_MAGIC)) + break; + } + while (!(label & BGP_MPLS_BOS)); + + if (!a) + return; + + /* Attach MPLS attribute unless we already have one */ + if (!s->mpls_labels) + { + s->mpls_labels = lp_alloc_adata(s->pool, 4*BGP_MPLS_MAX); + bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_MPLS_LABEL_STACK, 0, s->mpls_labels); + } + + /* Overwrite data in the attribute */ + s->mpls_labels->length = 4*lnum; + memcpy(s->mpls_labels->data, labels, 4*lnum); + + /* Update next hop entry in rta */ + bgp_apply_mpls_labels(s, a, labels, lnum); + + /* Attributes were changed, invalidate cached entry */ + rta_free(s->cached_rta); + s->cached_rta = NULL; + + return; +} static uint bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size) { byte *pos = buf; - while (!EMPTY_LIST(buck->prefixes) && (size >= (5 + sizeof(ip4_addr)))) + while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX)) { struct bgp_prefix *px = HEAD(buck->prefixes); struct net_addr_ip4 *net = (void *) px->net; @@ -924,14 +1048,17 @@ bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu ADVANCE(pos, size, 4); } - ip4_addr a = ip4_hton(net->prefix); - uint b = (net->pxlen + 7) / 8; - /* Encode prefix length */ *pos = net->pxlen; ADVANCE(pos, size, 1); + /* Encode MPLS labels */ + if (s->mpls) + bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1); + /* Encode prefix body */ + ip4_addr a = ip4_hton(net->prefix); + uint b = (net->pxlen + 7) / 8; memcpy(pos, &a, b); ADVANCE(pos, size, b); @@ -961,17 +1088,21 @@ bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, rta *a) /* Decode prefix length */ uint l = *pos; - uint b = (l + 7) / 8; ADVANCE(pos, len, 1); + if (len < ((l + 7) / 8)) + bgp_parse_error(s, 1); + + /* Decode MPLS labels */ + if (s->mpls) + bgp_decode_mpls_labels(s, &pos, &len, &l, a); + if (l > IP4_MAX_PREFIX_LENGTH) bgp_parse_error(s, 10); - if (len < b) - bgp_parse_error(s, 1); - /* Decode prefix body */ ip4_addr addr = IP4_NONE; + uint b = (l + 7) / 8; memcpy(&addr, pos, b); ADVANCE(pos, len, b); @@ -1016,7 +1147,7 @@ bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu { byte *pos = buf; - while (!EMPTY_LIST(buck->prefixes) && (size >= (5 + sizeof(ip6_addr)))) + while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX)) { struct bgp_prefix *px = HEAD(buck->prefixes); struct net_addr_ip6 *net = (void *) px->net; @@ -1028,14 +1159,17 @@ bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu ADVANCE(pos, size, 4); } - ip6_addr a = ip6_hton(net->prefix); - uint b = (net->pxlen + 7) / 8; - /* Encode prefix length */ *pos = net->pxlen; ADVANCE(pos, size, 1); + /* Encode MPLS labels */ + if (s->mpls) + bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1); + /* Encode prefix body */ + ip6_addr a = ip6_hton(net->prefix); + uint b = (net->pxlen + 7) / 8; memcpy(pos, &a, b); ADVANCE(pos, size, b); @@ -1065,17 +1199,21 @@ bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, rta *a) /* Decode prefix length */ uint l = *pos; - uint b = (l + 7) / 8; ADVANCE(pos, len, 1); + if (len < ((l + 7) / 8)) + bgp_parse_error(s, 1); + + /* Decode MPLS labels */ + if (s->mpls) + bgp_decode_mpls_labels(s, &pos, &len, &l, a); + if (l > IP6_MAX_PREFIX_LENGTH) bgp_parse_error(s, 10); - if (len < b) - bgp_parse_error(s, 1); - /* Decode prefix body */ ip6_addr addr = IP6_NONE; + uint b = (l + 7) / 8; memcpy(&addr, pos, b); ADVANCE(pos, len, b); @@ -1135,6 +1273,282 @@ bgp_decode_next_hop_ip6(struct bgp_parse_state *s, byte *data, uint len, rta *a) } +static uint +bgp_encode_nlri_vpn4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size) +{ + byte *pos = buf; + + while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX)) + { + struct bgp_prefix *px = HEAD(buck->prefixes); + struct net_addr_vpn4 *net = (void *) px->net; + + /* Encode path ID */ + if (s->add_path) + { + put_u32(pos, px->path_id); + ADVANCE(pos, size, 4); + } + + /* Encode prefix length */ + *pos = net->pxlen; + ADVANCE(pos, size, 1); + + /* Encode MPLS labels */ + bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1); + + /* Encode route distinguisher */ + put_u64(pos, net->rd); + ADVANCE(pos, size, 8); + + /* Encode prefix body */ + ip4_addr a = ip4_hton(net->prefix); + uint b = (net->pxlen + 7) / 8; + memcpy(pos, &a, b); + ADVANCE(pos, size, b); + + bgp_free_prefix(s->channel, px); + } + + return pos - buf; +} + +static void +bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, rta *a) +{ + while (len) + { + net_addr_vpn4 net; + u32 path_id = 0; + + /* Decode path ID */ + if (s->add_path) + { + if (len < 5) + bgp_parse_error(s, 1); + + path_id = get_u32(pos); + ADVANCE(pos, len, 4); + } + + /* Decode prefix length */ + uint l = *pos; + ADVANCE(pos, len, 1); + + if (len < ((l + 7) / 8)) + bgp_parse_error(s, 1); + + /* Decode MPLS labels */ + bgp_decode_mpls_labels(s, &pos, &len, &l, a); + + /* Decode route distinguisher */ + if (l < 64) + bgp_parse_error(s, 1); + + u64 rd = get_u64(pos); + ADVANCE(pos, len, 8); + l -= 64; + + if (l > IP4_MAX_PREFIX_LENGTH) + bgp_parse_error(s, 10); + + /* Decode prefix body */ + ip4_addr addr = IP4_NONE; + uint b = (l + 7) / 8; + memcpy(&addr, pos, b); + ADVANCE(pos, len, b); + + net = NET_ADDR_VPN4(ip4_ntoh(addr), l, rd); + net_normalize_vpn4(&net); + + // XXXX validate prefix + + bgp_rte_update(s, (net_addr *) &net, path_id, a); + } +} + +static uint +bgp_encode_next_hop_vpn4(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size UNUSED) +{ + /* This function is used only for MP-BGP, see bgp_encode_next_hop() for IPv4 BGP */ + + ASSERT(a->u.ptr->length == sizeof(ip_addr)); + + put_u64(buf, 0); /* VPN RD is 0 */ + put_ip4(buf+8, ipa_to_ip4( *(ip_addr *) a->u.ptr->data )); + + return 12; +} + +static void +bgp_decode_next_hop_vpn4(struct bgp_parse_state *s, byte *data, uint len, rta *a) +{ + if (len != 12) + bgp_parse_error(s, 9); + + /* XXXX which error */ + if (get_u64(data) != 0) + bgp_parse_error(s, 9); + + ip_addr nh = ipa_from_ip4(get_ip4(data+8)); + + // XXXX validate next hop + + bgp_set_attr_data(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, &nh, sizeof(nh)); + bgp_apply_next_hop(s, a, nh, IPA_NONE); +} + + +static uint +bgp_encode_nlri_vpn6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size) +{ + byte *pos = buf; + + while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX)) + { + struct bgp_prefix *px = HEAD(buck->prefixes); + struct net_addr_vpn6 *net = (void *) px->net; + + /* Encode path ID */ + if (s->add_path) + { + put_u32(pos, px->path_id); + ADVANCE(pos, size, 4); + } + + /* Encode prefix length */ + *pos = net->pxlen; + ADVANCE(pos, size, 1); + + /* Encode MPLS labels */ + bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1); + + /* Encode route distinguisher */ + put_u64(pos, net->rd); + ADVANCE(pos, size, 8); + + /* Encode prefix body */ + ip6_addr a = ip6_hton(net->prefix); + uint b = (net->pxlen + 7) / 8; + memcpy(pos, &a, b); + ADVANCE(pos, size, b); + + bgp_free_prefix(s->channel, px); + } + + return pos - buf; +} + +static void +bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, rta *a) +{ + while (len) + { + net_addr_vpn6 net; + u32 path_id = 0; + + /* Decode path ID */ + if (s->add_path) + { + if (len < 5) + bgp_parse_error(s, 1); + + path_id = get_u32(pos); + ADVANCE(pos, len, 4); + } + + /* Decode prefix length */ + uint l = *pos; + ADVANCE(pos, len, 1); + + if (len < ((l + 7) / 8)) + bgp_parse_error(s, 1); + + /* Decode MPLS labels */ + if (s->mpls) + bgp_decode_mpls_labels(s, &pos, &len, &l, a); + + /* Decode route distinguisher */ + if (l < 64) + bgp_parse_error(s, 1); + + u64 rd = get_u64(pos); + ADVANCE(pos, len, 8); + l -= 64; + + if (l > IP6_MAX_PREFIX_LENGTH) + bgp_parse_error(s, 10); + + /* Decode prefix body */ + ip6_addr addr = IP6_NONE; + uint b = (l + 7) / 8; + memcpy(&addr, pos, b); + ADVANCE(pos, len, b); + + net = NET_ADDR_VPN6(ip6_ntoh(addr), l, rd); + net_normalize_vpn6(&net); + + // XXXX validate prefix + + bgp_rte_update(s, (net_addr *) &net, path_id, a); + } +} + +static uint +bgp_encode_next_hop_vpn6(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size UNUSED) +{ + ip_addr *nh = (void *) a->u.ptr->data; + uint len = a->u.ptr->length; + + ASSERT((len == 16) || (len == 32)); + + put_u64(buf, 0); /* VPN RD is 0 */ + put_ip6(buf+8, ipa_to_ip6(nh[0])); + + if (len == 16) + return 24; + + put_u64(buf+24, 0); /* VPN RD is 0 */ + put_ip6(buf+32, ipa_to_ip6(nh[1])); + + return 48; +} + +static void +bgp_decode_next_hop_vpn6(struct bgp_parse_state *s, byte *data, uint len, rta *a) +{ + struct adata *ad = lp_alloc_adata(s->pool, 32); + ip_addr *nh = (void *) ad->data; + + if ((len != 24) && (len != 48)) + bgp_parse_error(s, 9); + + /* XXXX which error */ + if ((get_u64(data) != 0) || ((len == 48) && (get_u64(data+24) != 0))) + bgp_parse_error(s, 9); + + nh[0] = ipa_from_ip6(get_ip6(data+8)); + nh[1] = (len == 48) ? ipa_from_ip6(get_ip6(data+32)) : IPA_NONE; + + if (ip6_is_link_local(nh[0])) + { + nh[1] = nh[0]; + nh[0] = IPA_NONE; + } + + if (!ip6_is_link_local(nh[1])) + nh[1] = IPA_NONE; + + if (ipa_zero(nh[1])) + ad->length = 16; + + // XXXX validate next hop + + bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad); + bgp_apply_next_hop(s, a, nh[0], nh[1]); +} + + static uint bgp_encode_nlri_flow4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size) { @@ -1341,14 +1755,15 @@ static const struct bgp_af_desc bgp_af_table[] = { .update_next_hop = bgp_update_next_hop_ip, }, { - .afi = BGP_AF_FLOW4, - .net = NET_FLOW4, - .name = "flow4", - .encode_nlri = bgp_encode_nlri_flow4, - .decode_nlri = bgp_decode_nlri_flow4, - .encode_next_hop = bgp_encode_next_hop_none, - .decode_next_hop = bgp_decode_next_hop_none, - .update_next_hop = bgp_update_next_hop_none, + .afi = BGP_AF_IPV4_MPLS, + .net = NET_IP4, + .mpls = 1, + .name = "ipv4-mpls", + .encode_nlri = bgp_encode_nlri_ip4, + .decode_nlri = bgp_decode_nlri_ip4, + .encode_next_hop = bgp_encode_next_hop_ip4, + .decode_next_hop = bgp_decode_next_hop_ip4, + .update_next_hop = bgp_update_next_hop_ip, }, { .afi = BGP_AF_IPV6, @@ -1370,6 +1785,49 @@ static const struct bgp_af_desc bgp_af_table[] = { .decode_next_hop = bgp_decode_next_hop_ip6, .update_next_hop = bgp_update_next_hop_ip, }, + { + .afi = BGP_AF_IPV6_MPLS, + .net = NET_IP6, + .mpls = 1, + .name = "ipv6-mpls", + .encode_nlri = bgp_encode_nlri_ip6, + .decode_nlri = bgp_decode_nlri_ip6, + .encode_next_hop = bgp_encode_next_hop_ip6, + .decode_next_hop = bgp_decode_next_hop_ip6, + .update_next_hop = bgp_update_next_hop_ip, + }, + { + .afi = BGP_AF_VPN4_MPLS, + .net = NET_VPN4, + .mpls = 1, + .name = "vpn4-mpls", + .encode_nlri = bgp_encode_nlri_vpn4, + .decode_nlri = bgp_decode_nlri_vpn4, + .encode_next_hop = bgp_encode_next_hop_vpn4, + .decode_next_hop = bgp_decode_next_hop_vpn4, + .update_next_hop = bgp_update_next_hop_ip, + }, + { + .afi = BGP_AF_VPN6_MPLS, + .net = NET_VPN6, + .mpls = 1, + .name = "vpn6-mpls", + .encode_nlri = bgp_encode_nlri_vpn6, + .decode_nlri = bgp_decode_nlri_vpn6, + .encode_next_hop = bgp_encode_next_hop_vpn6, + .decode_next_hop = bgp_decode_next_hop_vpn6, + .update_next_hop = bgp_update_next_hop_ip, + }, + { + .afi = BGP_AF_FLOW4, + .net = NET_FLOW4, + .name = "flow4", + .encode_nlri = bgp_encode_nlri_flow4, + .decode_nlri = bgp_decode_nlri_flow4, + .encode_next_hop = bgp_encode_next_hop_none, + .decode_next_hop = bgp_decode_next_hop_none, + .update_next_hop = bgp_update_next_hop_none, + }, { .afi = BGP_AF_FLOW6, .net = NET_FLOW6, @@ -1566,6 +2024,8 @@ bgp_create_update(struct bgp_channel *c, byte *buf) byte *end = buf + (bgp_max_packet_length(p->conn) - BGP_HEADER_LENGTH); byte *res = NULL; +again: ; + /* Initialize write state */ struct bgp_write_state s = { .proto = p, @@ -1573,10 +2033,9 @@ bgp_create_update(struct bgp_channel *c, byte *buf) .pool = bgp_linpool, .as4_session = p->as4_session, .add_path = c->add_path_tx, + .mpls = c->desc->mpls, }; -again: - /* Try unreachable bucket */ if ((buck = c->withdraw_bucket) && !EMPTY_LIST(buck->prefixes)) { @@ -1692,6 +2151,7 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis s->channel = c; s->add_path = c->add_path_rx; + s->mpls = c->desc->mpls; s->last_id = 0; s->last_src = s->proto->p.main_source;