diff --git a/doc/bird.sgml b/doc/bird.sgml index bcf1c8fb..fc5fc9ae 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -1804,13 +1804,17 @@ using the following configuration parameters: other means. Default: 0 (no local AS number allowed). enable route refresh - When BGP speaker changes its import filter, it has to re-examine all - routes received from its neighbor against the new filter. As these - routes might not be available, there is a BGP protocol extension Route - Refresh (specified in RFC 2918) that allows BGP speaker to request - re-advertisement of all routes from its neighbor. This option specifies - whether BIRD advertises this capability and accepts such requests. Even - when disabled, BIRD can send route refresh requests. Default: on. + After the initial route exchange, BGP protocol uses incremental updates + to keep BGP speakers synchronized. Sometimes (e.g., if BGP speaker + changes its import filter, or if there is suspicion of inconsistency) it + is necessary to do a new complete route exchange. BGP protocol extension + Route Refresh (RFC 2918) allows BGP speaker to request re-advertisement + of all routes from its neighbor. BGP protocol extension Enhanced Route + Refresh (RFC 7313) specifies explicit begin and end for such exchanges, + therefore the receiver can remove stale routes that were not advertised + during the exchange. This option specifies whether BIRD advertises these + capabilities and supports related procedures. Note that even when + disabled, BIRD can send route refresh requests. Default: on. graceful restart When a BGP speaker restarts or crashes, neighbors will discard all diff --git a/nest/proto.c b/nest/proto.c index 7339e4f4..44cfb637 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -942,8 +942,8 @@ proto_feed_more(void *P) p->export_state = ES_READY; proto_log_state_change(p); - if (p->feed_done) - p->feed_done(p); + if (p->feed_end) + p->feed_end(p); } else { @@ -976,6 +976,9 @@ proto_schedule_feed(struct proto *p, int initial) p->attn->hook = initial ? proto_feed_initial : proto_feed_more; ev_schedule(p->attn); + + if (p->feed_begin) + p->feed_begin(p, initial); } /* diff --git a/nest/protocol.h b/nest/protocol.h index f46e0b13..8660cc2c 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -179,7 +179,8 @@ struct proto { * reload_routes Request protocol to reload all its routes to the core * (using rte_update()). Returns: 0=reload cannot be done, * 1= reload is scheduled and will happen (asynchronously). - * feed_done Notify protocol about finish of route feeding. + * feed_begin Notify protocol about beginning of route feeding. + * feed_end Notify protocol about finish of route feeding. */ void (*if_notify)(struct proto *, unsigned flags, struct iface *i); @@ -190,7 +191,8 @@ struct proto { void (*store_tmp_attrs)(struct rte *rt, struct ea_list *attrs); int (*import_control)(struct proto *, struct rte **rt, struct ea_list **attrs, struct linpool *pool); int (*reload_routes)(struct proto *); - void (*feed_done)(struct proto *); + void (*feed_begin)(struct proto *, int initial); + void (*feed_end)(struct proto *); /* * Routing entry hooks (called only for routes belonging to this protocol): diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 050f737f..e48b643b 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -377,6 +377,8 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) p->conn = conn; p->last_error_class = 0; p->last_error_code = 0; + p->feed_state = BFS_NONE; + p->load_state = BFS_NONE; bgp_init_bucket_table(p); bgp_init_prefix_table(p, 8); @@ -394,6 +396,12 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) if (p->gr_active && (!conn->peer_gr_able || !(conn->peer_gr_aflags & BGP_GRF_FORWARDING))) bgp_graceful_restart_done(p); + /* GR capability implies that neighbor will send End-of-RIB */ + if (conn->peer_gr_aware) + p->load_state = BFS_LOADING; + + /* proto_notify_state() will likely call bgp_feed_begin(), setting p->feed_state */ + bgp_conn_set_state(conn, BS_ESTABLISHED); proto_notify_state(&p->p, PS_UP); } @@ -504,6 +512,47 @@ bgp_graceful_restart_timeout(timer *t) bgp_stop(p, 0); } + +/** + * bgp_refresh_begin - start incoming enhanced route refresh sequence + * @p: BGP instance + * + * This function is called when an incoming enhanced route refresh sequence is + * started by the neighbor, demarcated by the BoRR packet. The function updates + * the load state and starts the routing table refresh cycle. Note that graceful + * restart also uses routing table refresh cycle, but RFC 7313 and load states + * ensure that these two sequences do not overlap. + */ +void +bgp_refresh_begin(struct bgp_proto *p) +{ + if (p->load_state == BFS_LOADING) + { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; } + + p->load_state = BFS_REFRESHING; + rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook); +} + +/** + * bgp_refresh_end - finish incoming enhanced route refresh sequence + * @p: BGP instance + * + * This function is called when an incoming enhanced route refresh sequence is + * finished by the neighbor, demarcated by the EoRR packet. The function updates + * the load state and ends the routing table refresh cycle. Routes not received + * during the sequence are removed by the nest. + */ +void +bgp_refresh_end(struct bgp_proto *p) +{ + if (p->load_state != BFS_REFRESHING) + { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; } + + p->load_state = BFS_NONE; + rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook); +} + + static void bgp_send_open(struct bgp_conn *conn) { @@ -514,6 +563,7 @@ bgp_send_open(struct bgp_conn *conn) conn->peer_refresh_support = 0; conn->peer_as4_support = 0; conn->peer_add_path = 0; + conn->peer_enhanced_refresh_support = 0; conn->peer_gr_aware = 0; conn->peer_gr_able = 0; conn->peer_gr_time = 0; @@ -959,16 +1009,56 @@ bgp_reload_routes(struct proto *P) } static void -bgp_feed_done(struct proto *P) +bgp_feed_begin(struct proto *P, int initial) { struct bgp_proto *p = (struct bgp_proto *) P; - if (!p->conn || !p->cf->gr_mode || p->p.refeeding) + + /* This should not happen */ + if (!p->conn) return; - p->send_end_mark = 1; + if (initial && p->cf->gr_mode) + p->feed_state = BFS_LOADING; + + /* It is refeed and both sides support enhanced route refresh */ + if (!initial && p->cf->enable_refresh && + p->conn->peer_enhanced_refresh_support) + { + /* BoRR must not be sent before End-of-RIB */ + if (p->feed_state == BFS_LOADING || p->feed_state == BFS_LOADED) + return; + + p->feed_state = BFS_REFRESHING; + bgp_schedule_packet(p->conn, PKT_BEGIN_REFRESH); + } +} + +static void +bgp_feed_end(struct proto *P) +{ + struct bgp_proto *p = (struct bgp_proto *) P; + + /* This should not happen */ + if (!p->conn) + return; + + /* Non-demarcated feed ended, nothing to do */ + if (p->feed_state == BFS_NONE) + return; + + /* Schedule End-of-RIB packet */ + if (p->feed_state == BFS_LOADING) + p->feed_state = BFS_LOADED; + + /* Schedule EoRR packet */ + if (p->feed_state == BFS_REFRESHING) + p->feed_state = BFS_REFRESHED; + + /* Kick TX hook */ bgp_schedule_packet(p->conn, PKT_UPDATE); } + static void bgp_start_locked(struct object_lock *lock) { @@ -1150,7 +1240,8 @@ bgp_init(struct proto_config *C) P->import_control = bgp_import_control; P->neigh_notify = bgp_neigh_notify; P->reload_routes = bgp_reload_routes; - P->feed_done = bgp_feed_done; + P->feed_begin = bgp_feed_begin; + P->feed_end = bgp_feed_end; P->rte_better = bgp_rte_better; P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL; @@ -1426,8 +1517,9 @@ bgp_show_proto_info(struct proto *P) else if (P->proto_state == PS_UP) { cli_msg(-1006, " Neighbor ID: %R", p->remote_id); - cli_msg(-1006, " Neighbor caps: %s%s%s%s%s", + cli_msg(-1006, " Neighbor caps: %s%s%s%s%s%s", c->peer_refresh_support ? " refresh" : "", + c->peer_enhanced_refresh_support ? " enhanced-refresh" : "", c->peer_gr_able ? " restart-able" : (c->peer_gr_aware ? " restart-aware" : ""), c->peer_as4_support ? " AS4" : "", (c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "", diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 2c2b02b8..f4f21226 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -103,6 +103,7 @@ struct bgp_conn { u8 peer_refresh_support; /* Peer supports route refresh [RFC2918] */ u8 peer_as4_support; /* Peer supports 4B AS numbers [RFC4893] */ u8 peer_add_path; /* Peer supports ADD-PATH [draft] */ + u8 peer_enhanced_refresh_support; /* Peer supports enhanced refresh [RFC7313] */ u8 peer_gr_aware; u8 peer_gr_able; u16 peer_gr_time; @@ -127,6 +128,8 @@ struct bgp_proto { int rs_client; /* Whether neighbor is RS client of me */ u8 gr_ready; /* Neighbor could do graceful restart */ u8 gr_active; /* Neighbor is doing graceful restart */ + u8 feed_state; /* Feed state (TX) for EoR, RR packets, see BFS_* */ + u8 load_state; /* Load state (RX) for EoR, RR packets, see BFS_* */ struct bgp_conn *conn; /* Connection we have established */ struct bgp_conn outgoing_conn; /* Outgoing connection we're working with */ struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */ @@ -144,7 +147,6 @@ struct bgp_proto { slab *prefix_slab; /* Slab holding prefix nodes */ list bucket_queue; /* Queue of buckets to send */ struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */ - unsigned send_end_mark; /* End-of-RIB mark scheduled for transmit */ unsigned startup_delay; /* Time to delay protocol startup by due to errors */ bird_clock_t last_proto_error; /* Time of last error that leads to protocol stop */ u8 last_error_class; /* Error class of last error */ @@ -196,6 +198,8 @@ void bgp_conn_enter_close_state(struct bgp_conn *conn); void bgp_conn_enter_idle_state(struct bgp_conn *conn); void bgp_handle_graceful_restart(struct bgp_proto *p); void bgp_graceful_restart_done(struct bgp_proto *p); +void bgp_refresh_begin(struct bgp_proto *p); +void bgp_refresh_end(struct bgp_proto *p); void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code); void bgp_stop(struct bgp_proto *p, unsigned subcode); @@ -263,7 +267,8 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi #define PKT_UPDATE 0x02 #define PKT_NOTIFICATION 0x03 #define PKT_KEEPALIVE 0x04 -#define PKT_ROUTE_REFRESH 0x05 +#define PKT_ROUTE_REFRESH 0x05 /* [RFC2918] */ +#define PKT_BEGIN_REFRESH 0x1e /* Dummy type for BoRR packet [RFC7313] */ #define PKT_SCHEDULE_CLOSE 0x1f /* Used internally to schedule socket close */ /* Attributes */ @@ -306,13 +311,13 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi #define BS_MAX 7 /* BGP start states - * + * * Used in PS_START for fine-grained specification of starting state. * - * When BGP protocol is started by core, it goes to BSS_PREPARE. When BGP protocol - * done what is neccessary to start itself (like acquiring the lock), it goes to BSS_CONNECT. - * When some connection attempt failed because of option or capability error, it goes to - * BSS_CONNECT_NOCAP. + * When BGP protocol is started by core, it goes to BSS_PREPARE. When BGP + * protocol done what is neccessary to start itself (like acquiring the lock), + * it goes to BSS_CONNECT. When some connection attempt failed because of + * option or capability error, it goes to BSS_CONNECT_NOCAP. */ #define BSS_PREPARE 0 /* Used before ordinary BGP started, i. e. waiting for lock */ @@ -320,6 +325,33 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi #define BSS_CONNECT 2 /* Ordinary BGP connecting */ #define BSS_CONNECT_NOCAP 3 /* Legacy BGP connecting (without capabilities) */ + +/* BGP feed states (TX) + * + * RFC 4724 specifies that an initial feed should end with End-of-RIB mark. + * + * RFC 7313 specifies that a route refresh should be demarcated by BoRR and EoRR packets. + * + * These states (stored in p->feed_state) are used to keep track of these + * requirements. When such feed is started, BFS_LOADING / BFS_REFRESHING is + * set. When it ended, BFS_LOADED / BFS_REFRESHED is set to schedule End-of-RIB + * or EoRR packet. When the packet is sent, the state returned to BFS_NONE. + * + * Note that when a non-demarcated feed (e.g. plain RFC 4271 initial load + * without End-of-RIB or plain RFC 2918 route refresh without BoRR/EoRR + * demarcation) is active, BFS_NONE is set. + * + * BFS_NONE, BFS_LOADING and BFS_REFRESHING are also used as load states (RX) + * with correspondent semantics (-, expecting End-of-RIB, expecting EoRR). + */ + +#define BFS_NONE 0 /* No feed or original non-demarcated feed */ +#define BFS_LOADING 1 /* Initial feed active, End-of-RIB planned */ +#define BFS_LOADED 2 /* Loading done, End-of-RIB marker scheduled */ +#define BFS_REFRESHING 3 /* Route refresh (introduced by BoRR) active */ +#define BFS_REFRESHED 4 /* Refresh done, EoRR packet scheduled */ + + /* Error classes */ #define BE_NONE 0 diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index d34e7c56..2d2a84b3 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -22,6 +22,12 @@ #include "bgp.h" + +#define BGP_RR_REQUEST 0 +#define BGP_RR_BEGIN 1 +#define BGP_RR_END 2 + + static struct tbf rl_rcv_update = TBF_DEFAULT_LOG_LIMITS; static struct tbf rl_snd_update = TBF_DEFAULT_LOG_LIMITS; @@ -209,6 +215,15 @@ bgp_put_cap_add_path(struct bgp_proto *p, byte *buf) return buf; } +static byte * +bgp_put_cap_err(struct bgp_proto *p UNUSED, byte *buf) +{ + *buf++ = 70; /* Capability 70: Support for enhanced route refresh */ + *buf++ = 0; /* Capability data length */ + return buf; +} + + static byte * bgp_create_open(struct bgp_conn *conn, byte *buf) { @@ -256,6 +271,9 @@ bgp_create_open(struct bgp_conn *conn, byte *buf) if (p->cf->add_path) cap = bgp_put_cap_add_path(p, cap); + if (p->cf->enable_refresh) + cap = bgp_put_cap_err(p, cap); + cap_len = cap - buf - 12; if (cap_len > 0) { @@ -389,7 +407,7 @@ static byte * bgp_create_end_mark(struct bgp_conn *conn, byte *buf) { struct bgp_proto *p = conn->bgp; - BGP_TRACE(D_PACKETS, "Sending End-of-RIB"); + BGP_TRACE(D_PACKETS, "Sending END-OF-RIB"); put_u32(buf, 0); return buf+4; @@ -568,7 +586,7 @@ static byte * bgp_create_end_mark(struct bgp_conn *conn, byte *buf) { struct bgp_proto *p = conn->bgp; - BGP_TRACE(D_PACKETS, "Sending End-of-RIB"); + BGP_TRACE(D_PACKETS, "Sending END-OF-RIB"); put_u16(buf+0, 0); put_u16(buf+2, 6); /* length 4-9 */ @@ -586,19 +604,49 @@ bgp_create_end_mark(struct bgp_conn *conn, byte *buf) #endif -static byte * +static inline byte * bgp_create_route_refresh(struct bgp_conn *conn, byte *buf) { struct bgp_proto *p = conn->bgp; BGP_TRACE(D_PACKETS, "Sending ROUTE-REFRESH"); + /* Original original route refresh request, RFC 2918 */ *buf++ = 0; *buf++ = BGP_AF; - *buf++ = 0; /* RFU */ - *buf++ = 1; /* and SAFI 1 */ + *buf++ = BGP_RR_REQUEST; + *buf++ = 1; /* SAFI */ return buf; } +static inline byte * +bgp_create_begin_refresh(struct bgp_conn *conn, byte *buf) +{ + struct bgp_proto *p = conn->bgp; + BGP_TRACE(D_PACKETS, "Sending BEGIN-OF-RR"); + + /* Demarcation of beginning of route refresh (BoRR), RFC 7313 */ + *buf++ = 0; + *buf++ = BGP_AF; + *buf++ = BGP_RR_BEGIN; + *buf++ = 1; /* SAFI */ + return buf; +} + +static inline byte * +bgp_create_end_refresh(struct bgp_conn *conn, byte *buf) +{ + struct bgp_proto *p = conn->bgp; + BGP_TRACE(D_PACKETS, "Sending END-OF-RR"); + + /* Demarcation of ending of route refresh (EoRR), RFC 7313 */ + *buf++ = 0; + *buf++ = BGP_AF; + *buf++ = BGP_RR_END; + *buf++ = 1; /* SAFI */ + return buf; +} + + static void bgp_create_header(byte *buf, unsigned int len, unsigned int type) { @@ -666,24 +714,44 @@ bgp_fire_tx(struct bgp_conn *conn) type = PKT_ROUTE_REFRESH; end = bgp_create_route_refresh(conn, pkt); } + else if (s & (1 << PKT_BEGIN_REFRESH)) + { + s &= ~(1 << PKT_BEGIN_REFRESH); + type = PKT_ROUTE_REFRESH; /* BoRR is a subtype of RR */ + end = bgp_create_begin_refresh(conn, pkt); + } else if (s & (1 << PKT_UPDATE)) { - end = bgp_create_update(conn, pkt); type = PKT_UPDATE; + end = bgp_create_update(conn, pkt); if (!end) - { + { + /* No update to send, perhaps we need to send End-of-RIB or EoRR */ + conn->packets_to_send = 0; - if (!p->send_end_mark) + if (p->feed_state == BFS_LOADED) + { + type = PKT_UPDATE; + end = bgp_create_end_mark(conn, pkt); + } + + else if (p->feed_state == BFS_REFRESHED) + { + type = PKT_ROUTE_REFRESH; + end = bgp_create_end_refresh(conn, pkt); + } + + else /* Really nothing to send */ return 0; - p->send_end_mark = 0; - end = bgp_create_end_mark(conn, pkt); + p->feed_state = BFS_NONE; } } else return 0; + conn->packets_to_send = s; bgp_create_header(buf, end - buf, type); return sk_send(sk, end - buf); @@ -737,7 +805,7 @@ bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len) { if (len < 2 || len < 2 + opt[1]) goto err; - + cl = opt[1]; switch (opt[0]) @@ -780,7 +848,12 @@ bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len) conn->peer_add_path = opt[2+i+3]; if (conn->peer_add_path > ADD_PATH_FULL) goto err; + break; + case 70: /* Enhanced route refresh capability, RFC 7313 */ + if (cl != 0) + goto err; + conn->peer_enhanced_refresh_support = 1; break; /* We can safely ignore all other capabilities */ @@ -945,7 +1018,10 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len) static inline void bgp_rx_end_mark(struct bgp_proto *p) { - BGP_TRACE(D_PACKETS, "Got End-of-RIB"); + BGP_TRACE(D_PACKETS, "Got END-OF-RIB"); + + if (p->load_state == BFS_LOADING) + p->load_state = BFS_NONE; if (p->p.gr_recovery) proto_graceful_restart_unlock(&p->p); @@ -1353,7 +1429,9 @@ static struct { { 6, 5, "Connection rejected" }, { 6, 6, "Other configuration change" }, { 6, 7, "Connection collision resolution" }, - { 6, 8, "Out of Resources" } + { 6, 8, "Out of Resources" }, + { 7, 0, "Invalid ROUTE-REFRESH message" }, /* [RFC7313] */ + { 7, 1, "Invalid ROUTE-REFRESH message length" } /* [RFC7313] */ }; /** @@ -1484,22 +1562,47 @@ bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, int len) { struct bgp_proto *p = conn->bgp; - BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH"); - if (conn->state != BS_ESTABLISHED) { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; } if (!p->cf->enable_refresh) { bgp_error(conn, 1, 3, pkt+18, 1); return; } - if (len != (BGP_HEADER_LENGTH + 4)) + if (len < (BGP_HEADER_LENGTH + 4)) { bgp_error(conn, 1, 2, pkt+16, 2); return; } + if (len > (BGP_HEADER_LENGTH + 4)) + { bgp_error(conn, 7, 1, pkt, MIN(len, 2048)); return; } + /* FIXME - we ignore AFI/SAFI values, as we support just one value and even an error code for an invalid request is not defined */ - proto_request_feeding(&p->p); + /* RFC 7313 redefined reserved field as RR message subtype */ + uint subtype = conn->peer_enhanced_refresh_support ? pkt[21] : BGP_RR_REQUEST; + + switch (subtype) + { + case BGP_RR_REQUEST: + BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH"); + proto_request_feeding(&p->p); + break; + + case BGP_RR_BEGIN: + BGP_TRACE(D_PACKETS, "Got BEGIN-OF-RR"); + bgp_refresh_begin(p); + break; + + case BGP_RR_END: + BGP_TRACE(D_PACKETS, "Got END-OF-RR"); + bgp_refresh_end(p); + break; + + default: + log(L_WARN "%s: Got ROUTE-REFRESH message with unknown subtype %u, ignoring", + p->p.name, subtype); + break; + } } diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index 78514cf5..0a223a4f 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -1023,7 +1023,7 @@ krt_reload_routes(struct proto *P) } static void -krt_feed_done(struct proto *P) +krt_feed_end(struct proto *P) { struct krt_proto *p = (struct krt_proto *) P; @@ -1056,7 +1056,7 @@ krt_init(struct proto_config *c) p->p.rt_notify = krt_rt_notify; p->p.if_notify = krt_if_notify; p->p.reload_routes = krt_reload_routes; - p->p.feed_done = krt_feed_done; + p->p.feed_end = krt_feed_end; p->p.make_tmp_attrs = krt_make_tmp_attrs; p->p.store_tmp_attrs = krt_store_tmp_attrs; p->p.rte_same = krt_rte_same;