BGP: Long-lived graceful restart
The patch implements long-lived graceful restart for BGP, namely draft-uttaro-idr-bgp-persistence-03.
This commit is contained in:
parent
318acb0f6c
commit
5bd734317c
10 changed files with 432 additions and 28 deletions
|
@ -2220,13 +2220,16 @@ using the following configuration parameters:
|
|||
immediately shut down. Note that this option cannot be used with
|
||||
multihop BGP. Default: enabled for direct BGP, disabled otherwise.
|
||||
|
||||
<tag><label id="bgp-bfd">bfd <M>switch</M></tag>
|
||||
<tag><label id="bgp-bfd">bfd <M>switch</M>|graceful</tag>
|
||||
BGP could use BFD protocol as an advisory mechanism for neighbor
|
||||
liveness and failure detection. If enabled, BIRD setups a BFD session
|
||||
for the BGP neighbor and tracks its liveness by it. This has an
|
||||
advantage of an order of magnitude lower detection times in case of
|
||||
failure. Note that BFD protocol also has to be configured, see
|
||||
<ref id="bfd" name="BFD"> section for details. Default: disabled.
|
||||
failure. When a neighbor failure is detected, the BGP session is
|
||||
restarted. Optionally, it can be configured (by <cf/graceful/ argument)
|
||||
to trigger graceful restart instead of regular restart. Note that BFD
|
||||
protocol also has to be configured, see <ref id="bfd" name="BFD">
|
||||
section for details. Default: disabled.
|
||||
|
||||
<tag><label id="bgp-ttl-security">ttl security <m/switch/</tag>
|
||||
Use GTSM (<rfc id="5082"> - the generalized TTL security mechanism). GTSM
|
||||
|
@ -2348,6 +2351,25 @@ using the following configuration parameters:
|
|||
re-establish after a restart before deleting stale routes. Default:
|
||||
120 seconds.
|
||||
|
||||
<tag><label id="bgp-long-lived-graceful-restart">long lived graceful restart <m/switch/|aware</tag>
|
||||
The long-lived graceful restart is an extension of the traditional
|
||||
<ref id="bgp-graceful-restart" name="BGP graceful restart">, where stale
|
||||
routes are kept even after the <ref id="bgp-graceful-restart-time"
|
||||
name="restart time"> expires for additional long-lived stale time, but
|
||||
they are marked with the LLGR_STALE community, depreferenced, and
|
||||
withdrawn from routers not supporting LLGR. Like traditional BGP
|
||||
graceful restart, it has three states: disabled, aware (receiving-only),
|
||||
and enabled. Note that long-lived graceful restart requires at least
|
||||
aware level of traditional BGP graceful restart. Default: aware, unless
|
||||
graceful restart is disabled.
|
||||
|
||||
<tag><label id="bgp-long-lived-stale-time">long lived stale time <m/number/</tag>
|
||||
The long-lived stale time is announced in the BGP long-lived graceful
|
||||
restart capability and specifies how long the neighbor would keep stale
|
||||
routes depreferenced during long-lived graceful restart until either the
|
||||
session is re-stablished and synchronized or the stale time expires and
|
||||
routes are removed. Default: 3600 seconds.
|
||||
|
||||
<tag><label id="bgp-interpret-communities">interpret communities <m/switch/</tag>
|
||||
<rfc id="1997"> demands that BGP speaker should process well-known
|
||||
communities like no-export (65535, 65281) or no-advertise (65535,
|
||||
|
@ -2607,6 +2629,19 @@ be used in explicit configuration.
|
|||
configure restarting role per AFI/SAFI pair by this channel option.
|
||||
The option is ignored if graceful restart is disabled by protocol-wide
|
||||
option. Default: off in aware mode, on in full mode.
|
||||
|
||||
<tag><label id="bgp-long-lived-graceful-restart-c">long lived graceful restart <m/switch/</tag>
|
||||
BGP long-lived graceful restart is configured mainly by protocol-wide
|
||||
<ref id="bgp-long-lived-graceful-restart" name="options">, but the
|
||||
restarting role can be set per AFI/SAFI pair by this channel option.
|
||||
The option is ignored if long-lived graceful restart is disabled by
|
||||
protocol-wide option. Default: off in aware mode, on in full mode.
|
||||
|
||||
<tag><label id="bgp-long-lived-stale-time-c">long lived stale time <m/number/</tag>
|
||||
Like previous graceful restart channel options, this option allows to
|
||||
set <ref id="bgp-long-lived-stale-time" name="long lived stale time">
|
||||
per AFI/SAFI pair instead of per protocol. Default: set by protocol-wide
|
||||
option.
|
||||
</descrip>
|
||||
|
||||
<sect1>Attributes
|
||||
|
@ -2761,7 +2796,6 @@ interfaces to be defined for them to work with.
|
|||
so the default time is set to a large value.
|
||||
|
||||
<tag><label id="device-iface">interface <m/pattern/ [, <m/.../]</tag>
|
||||
|
||||
By default, the Device protocol handles all interfaces without any
|
||||
configuration. Interface definitions allow to specify optional
|
||||
parameters for specific interfaces. See <ref id="proto-iface"
|
||||
|
|
|
@ -229,6 +229,7 @@ struct proto {
|
|||
int (*rte_better)(struct rte *, struct rte *);
|
||||
int (*rte_same)(struct rte *, struct rte *);
|
||||
int (*rte_mergable)(struct rte *, struct rte *);
|
||||
struct rte * (*rte_modify)(struct rte *, struct linpool *);
|
||||
void (*rte_insert)(struct network *, struct rte *);
|
||||
void (*rte_remove)(struct network *, struct rte *);
|
||||
|
||||
|
|
|
@ -231,6 +231,7 @@ typedef struct rte {
|
|||
#ifdef CONFIG_BGP
|
||||
struct {
|
||||
u8 suppressed; /* Used for deterministic MED comparison */
|
||||
s8 stale; /* Route is LLGR_STALE, -1 if unknown */
|
||||
} bgp;
|
||||
#endif
|
||||
#ifdef CONFIG_BABEL
|
||||
|
@ -254,6 +255,7 @@ typedef struct rte {
|
|||
#define REF_FILTERED 2 /* Route is rejected by import filter */
|
||||
#define REF_STALE 4 /* Route is stale in a refresh cycle */
|
||||
#define REF_DISCARD 8 /* Route is scheduled for discard */
|
||||
#define REF_MODIFY 16 /* Route is scheduled for modify */
|
||||
|
||||
/* Route is valid for propagation (may depend on other flags in the future), accepts NULL */
|
||||
static inline int rte_is_valid(rte *r) { return r && !(r->flags & REF_FILTERED); }
|
||||
|
@ -297,6 +299,7 @@ int rt_examine(rtable *t, net_addr *a, struct proto *p, struct filter *filter);
|
|||
rte *rt_export_merged(struct channel *c, net *net, rte **rt_free, linpool *pool, int silent);
|
||||
void rt_refresh_begin(rtable *t, struct channel *c);
|
||||
void rt_refresh_end(rtable *t, struct channel *c);
|
||||
void rt_modify_stale(rtable *t, struct channel *c);
|
||||
void rt_schedule_prune(rtable *t);
|
||||
void rte_dump(rte *);
|
||||
void rte_free(rte *);
|
||||
|
|
|
@ -1437,6 +1437,28 @@ rte_discard(rte *old) /* Non-filtered route deletion, used during garbage collec
|
|||
rte_update_unlock();
|
||||
}
|
||||
|
||||
/* Modify existing route by protocol hook, used for long-lived graceful restart */
|
||||
static inline void
|
||||
rte_modify(rte *old)
|
||||
{
|
||||
rte_update_lock();
|
||||
|
||||
rte *new = old->sender->proto->rte_modify(old, rte_update_pool);
|
||||
if (new != old)
|
||||
{
|
||||
if (new)
|
||||
{
|
||||
if (!rta_is_cached(new->attrs))
|
||||
new->attrs = rta_lookup(new->attrs);
|
||||
new->flags = (old->flags & ~REF_MODIFY) | REF_COW;
|
||||
}
|
||||
|
||||
rte_recalculate(old->sender, old->net, new, old->attrs->src);
|
||||
}
|
||||
|
||||
rte_update_unlock();
|
||||
}
|
||||
|
||||
/* Check rtable for best route to given net whether it would be exported do p */
|
||||
int
|
||||
rt_examine(rtable *t, net_addr *a, struct proto *p, struct filter *filter)
|
||||
|
@ -1521,6 +1543,26 @@ rt_refresh_end(rtable *t, struct channel *c)
|
|||
rt_schedule_prune(t);
|
||||
}
|
||||
|
||||
void
|
||||
rt_modify_stale(rtable *t, struct channel *c)
|
||||
{
|
||||
int prune = 0;
|
||||
|
||||
FIB_WALK(&t->fib, net, n)
|
||||
{
|
||||
rte *e;
|
||||
for (e = n->routes; e; e = e->next)
|
||||
if ((e->sender == c) && (e->flags & REF_STALE) && !(e->flags & REF_FILTERED))
|
||||
{
|
||||
e->flags |= REF_MODIFY;
|
||||
prune = 1;
|
||||
}
|
||||
}
|
||||
FIB_WALK_END;
|
||||
|
||||
if (prune)
|
||||
rt_schedule_prune(t);
|
||||
}
|
||||
|
||||
/**
|
||||
* rte_dump - dump a route
|
||||
|
@ -1712,6 +1754,7 @@ again:
|
|||
|
||||
rescan:
|
||||
for (e=n->routes; e; e=e->next)
|
||||
{
|
||||
if (e->sender->flush_active || (e->flags & REF_DISCARD))
|
||||
{
|
||||
if (limit <= 0)
|
||||
|
@ -1727,6 +1770,22 @@ again:
|
|||
goto rescan;
|
||||
}
|
||||
|
||||
if (e->flags & REF_MODIFY)
|
||||
{
|
||||
if (limit <= 0)
|
||||
{
|
||||
FIB_ITERATE_PUT(fit);
|
||||
ev_schedule(tab->rt_event);
|
||||
return;
|
||||
}
|
||||
|
||||
rte_modify(e);
|
||||
limit--;
|
||||
|
||||
goto rescan;
|
||||
}
|
||||
}
|
||||
|
||||
if (!n->routes) /* Orphaned FIB entry */
|
||||
{
|
||||
FIB_ITERATE_PUT(fit);
|
||||
|
|
|
@ -1413,6 +1413,10 @@ bgp_import_control(struct proto *P, rte **new, struct linpool *pool UNUSED)
|
|||
/* Do not export outside of AS (or confederation) */
|
||||
if (!p->is_interior && int_set_contains(d, BGP_COMM_NO_EXPORT))
|
||||
return -1;
|
||||
|
||||
/* Do not export LLGR_STALE routes to LLGR-ignorant peers */
|
||||
if (!p->conn->remote_caps->llgr_aware && int_set_contains(d, BGP_COMM_LLGR_STALE))
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -1580,6 +1584,19 @@ rte_resolvable(rte *rt)
|
|||
return rt->attrs->dest == RTD_UNICAST;
|
||||
}
|
||||
|
||||
static inline int
|
||||
rte_stale(rte *r)
|
||||
{
|
||||
if (r->u.bgp.stale < 0)
|
||||
{
|
||||
/* If staleness is unknown, compute and cache it */
|
||||
eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY));
|
||||
r->u.bgp.stale = a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE);
|
||||
}
|
||||
|
||||
return r->u.bgp.stale;
|
||||
}
|
||||
|
||||
int
|
||||
bgp_rte_better(rte *new, rte *old)
|
||||
{
|
||||
|
@ -1604,6 +1621,14 @@ bgp_rte_better(rte *new, rte *old)
|
|||
if (n < o)
|
||||
return 0;
|
||||
|
||||
/* LLGR draft - depreference stale routes */
|
||||
n = rte_stale(new);
|
||||
o = rte_stale(old);
|
||||
if (n > o)
|
||||
return 0;
|
||||
if (n < o)
|
||||
return 1;
|
||||
|
||||
/* Start with local preferences */
|
||||
x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
|
||||
y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
|
||||
|
@ -1725,6 +1750,10 @@ bgp_rte_mergable(rte *pri, rte *sec)
|
|||
if (!rte_resolvable(sec))
|
||||
return 0;
|
||||
|
||||
/* LLGR draft - depreference stale routes */
|
||||
if (rte_stale(pri) != rte_stale(sec))
|
||||
return 0;
|
||||
|
||||
/* Start with local preferences */
|
||||
x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
|
||||
y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
|
||||
|
@ -1926,6 +1955,27 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
|
|||
return old_is_group_best;
|
||||
}
|
||||
|
||||
struct rte *
|
||||
bgp_rte_modify_stale(struct rte *r, struct linpool *pool)
|
||||
{
|
||||
eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY));
|
||||
struct adata *ad = a ? a->u.ptr : NULL;
|
||||
uint flags = a ? a->flags : BAF_PARTIAL;
|
||||
|
||||
if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR))
|
||||
return NULL;
|
||||
|
||||
if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE))
|
||||
return r;
|
||||
|
||||
r = rte_cow_rta(r, pool);
|
||||
bgp_set_attr_ptr(&(r->attrs->eattrs), pool, BA_COMMUNITY, flags,
|
||||
int_set_add(pool, ad, BGP_COMM_LLGR_STALE));
|
||||
r->u.bgp.stale = 1;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Reconstruct AS_PATH and AGGREGATOR according to RFC 6793 4.2.3
|
||||
|
@ -2011,6 +2061,9 @@ bgp_get_route_info(rte *e, byte *buf)
|
|||
if (e->u.bgp.suppressed)
|
||||
buf += bsprintf(buf, "-");
|
||||
|
||||
if (rte_stale(e))
|
||||
buf += bsprintf(buf, "s");
|
||||
|
||||
if (e->attrs->hostentry)
|
||||
{
|
||||
if (!rte_resolvable(e))
|
||||
|
|
162
proto/bgp/bgp.c
162
proto/bgp/bgp.c
|
@ -513,8 +513,8 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
|
|||
p->route_refresh = peer->route_refresh;
|
||||
p->enhanced_refresh = local->enhanced_refresh && peer->enhanced_refresh;
|
||||
|
||||
/* Whether we may handle possible GR of peer (it has some AF GR-able) */
|
||||
p->gr_ready = 0; /* Updated later */
|
||||
/* Whether we may handle possible GR/LLGR of peer (it has some AF GR-able) */
|
||||
p->gr_ready = p->llgr_ready = 0; /* Updated later */
|
||||
|
||||
/* Whether peer is ready to handle our GR recovery */
|
||||
int peer_gr_ready = peer->gr_aware && !(peer->gr_flags & BGP_GRF_RESTART);
|
||||
|
@ -547,8 +547,15 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
|
|||
c->load_state = BFS_NONE;
|
||||
|
||||
/* Channels where peer may do GR */
|
||||
c->gr_ready = active && local->gr_aware && rem->gr_able;
|
||||
uint gr_ready = active && local->gr_aware && rem->gr_able;
|
||||
uint llgr_ready = active && local->llgr_aware && rem->llgr_able;
|
||||
|
||||
c->gr_ready = gr_ready || llgr_ready;
|
||||
p->gr_ready = p->gr_ready || c->gr_ready;
|
||||
p->llgr_ready = p->llgr_ready || llgr_ready;
|
||||
|
||||
/* Remember last LLGR stale time */
|
||||
c->stale_time = local->llgr_aware ? rem->llgr_time : 0;
|
||||
|
||||
/* Channels not able to recover gracefully */
|
||||
if (p->p.gr_recovery && (!active || !peer_gr_ready))
|
||||
|
@ -558,8 +565,14 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
|
|||
if (p->p.gr_recovery && loc->gr_able && peer_gr_ready)
|
||||
c->c.gr_wait = 1;
|
||||
|
||||
/* Channels where peer is not able to recover gracefully */
|
||||
if (c->gr_active && ! (c->gr_ready && (rem->gr_af_flags & BGP_GRF_FORWARDING)))
|
||||
/* Channels where regular graceful restart failed */
|
||||
if ((c->gr_active == BGP_GRS_ACTIVE) &&
|
||||
!(active && rem->gr_able && (rem->gr_af_flags & BGP_GRF_FORWARDING)))
|
||||
bgp_graceful_restart_done(c);
|
||||
|
||||
/* Channels where regular long-lived restart failed */
|
||||
if ((c->gr_active == BGP_GRS_LLGR) &&
|
||||
!(active && rem->llgr_able && (rem->gr_af_flags & BGP_LLGRF_FORWARDING)))
|
||||
bgp_graceful_restart_done(c);
|
||||
|
||||
/* GR capability implies that neighbor will send End-of-RIB */
|
||||
|
@ -669,12 +682,25 @@ bgp_handle_graceful_restart(struct bgp_proto *p)
|
|||
|
||||
if (c->gr_ready)
|
||||
{
|
||||
if (c->gr_active)
|
||||
rt_refresh_end(c->c.table, &c->c);
|
||||
|
||||
c->gr_active = 1;
|
||||
p->gr_active_num++;
|
||||
|
||||
switch (c->gr_active)
|
||||
{
|
||||
case BGP_GRS_NONE:
|
||||
c->gr_active = BGP_GRS_ACTIVE;
|
||||
rt_refresh_begin(c->c.table, &c->c);
|
||||
break;
|
||||
|
||||
case BGP_GRS_ACTIVE:
|
||||
rt_refresh_end(c->c.table, &c->c);
|
||||
rt_refresh_begin(c->c.table, &c->c);
|
||||
break;
|
||||
|
||||
case BGP_GRS_LLGR:
|
||||
rt_refresh_begin(c->c.table, &c->c);
|
||||
rt_modify_stale(c->c.table, &c->c);
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -695,7 +721,7 @@ bgp_handle_graceful_restart(struct bgp_proto *p)
|
|||
ASSERT(p->gr_active_num > 0);
|
||||
|
||||
proto_notify_state(&p->p, PS_START);
|
||||
bgp_start_timer(p->gr_timer, p->conn->remote_caps->gr_time);
|
||||
tm_start(p->gr_timer, p->conn->remote_caps->gr_time S);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -720,6 +746,7 @@ bgp_graceful_restart_done(struct bgp_channel *c)
|
|||
if (!p->gr_active_num)
|
||||
BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
|
||||
|
||||
tm_stop(c->stale_timer);
|
||||
rt_refresh_end(c->c.table, &c->c);
|
||||
}
|
||||
|
||||
|
@ -738,9 +765,48 @@ bgp_graceful_restart_timeout(timer *t)
|
|||
struct bgp_proto *p = t->data;
|
||||
|
||||
BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
|
||||
|
||||
if (p->llgr_ready)
|
||||
{
|
||||
struct bgp_channel *c;
|
||||
WALK_LIST(c, p->p.channels)
|
||||
{
|
||||
/* Channel is not in GR and is already flushed */
|
||||
if (!c->gr_active)
|
||||
continue;
|
||||
|
||||
/* Channel is already in LLGR from past restart */
|
||||
if (c->gr_active == BGP_GRS_LLGR)
|
||||
continue;
|
||||
|
||||
/* Channel is in GR, but does not support LLGR -> stop GR */
|
||||
if (!c->stale_time)
|
||||
{
|
||||
bgp_graceful_restart_done(c);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Channel is in GR, and supports LLGR -> start LLGR */
|
||||
c->gr_active = BGP_GRS_LLGR;
|
||||
tm_start(c->stale_timer, c->stale_time S);
|
||||
rt_modify_stale(c->c.table, &c->c);
|
||||
}
|
||||
}
|
||||
else
|
||||
bgp_stop(p, 0, NULL, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
bgp_long_lived_stale_timeout(timer *t)
|
||||
{
|
||||
struct bgp_channel *c = t->data;
|
||||
struct bgp_proto *p = (void *) c->c.proto;
|
||||
|
||||
BGP_TRACE(D_EVENTS, "Long-lived stale timeout");
|
||||
|
||||
bgp_graceful_restart_done(c);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* bgp_refresh_begin - start incoming enhanced route refresh sequence
|
||||
|
@ -873,6 +939,12 @@ bgp_hold_timeout(timer *t)
|
|||
|
||||
if (sk_rx_ready(conn->sk) > 0)
|
||||
bgp_start_timer(conn->hold_timer, 10);
|
||||
else if ((conn->state == BS_ESTABLISHED) && p->llgr_ready)
|
||||
{
|
||||
BGP_TRACE(D_EVENTS, "Hold timer expired");
|
||||
bgp_handle_graceful_restart(p);
|
||||
bgp_conn_enter_idle_state(conn);
|
||||
}
|
||||
else
|
||||
bgp_error(conn, 4, 0, NULL, 0);
|
||||
}
|
||||
|
@ -1172,10 +1244,27 @@ bgp_bfd_notify(struct bfd_request *req)
|
|||
{
|
||||
BGP_TRACE(D_EVENTS, "BFD session down");
|
||||
bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
|
||||
|
||||
if (p->cf->bfd == BGP_BFD_GRACEFUL)
|
||||
{
|
||||
/* Trigger graceful restart */
|
||||
if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
|
||||
bgp_handle_graceful_restart(p);
|
||||
|
||||
if (p->incoming_conn.state > BS_IDLE)
|
||||
bgp_conn_enter_idle_state(&p->incoming_conn);
|
||||
|
||||
if (p->outgoing_conn.state > BS_IDLE)
|
||||
bgp_conn_enter_idle_state(&p->outgoing_conn);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Trigger session down */
|
||||
if (ps == PS_UP)
|
||||
bgp_update_startup_delay(p);
|
||||
bgp_stop(p, 0, NULL, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -1447,6 +1536,7 @@ bgp_init(struct proto_config *CF)
|
|||
P->rte_better = bgp_rte_better;
|
||||
P->rte_mergable = bgp_rte_mergable;
|
||||
P->rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL;
|
||||
P->rte_modify = bgp_rte_modify_stale;
|
||||
|
||||
p->cf = cf;
|
||||
p->local_as = cf->local_as;
|
||||
|
@ -1503,6 +1593,8 @@ bgp_channel_start(struct channel *C)
|
|||
bgp_init_bucket_table(c);
|
||||
bgp_init_prefix_table(c);
|
||||
|
||||
c->stale_timer = tm_new_init(c->pool, bgp_long_lived_stale_timeout, c, 0, 0);
|
||||
|
||||
c->next_hop_addr = c->cf->next_hop_addr;
|
||||
c->link_addr = IPA_NONE;
|
||||
c->packets_to_send = 0;
|
||||
|
@ -1634,6 +1726,10 @@ bgp_postconfig(struct proto_config *CF)
|
|||
if (cf->multihop < 0)
|
||||
cf->multihop = internal ? 64 : 0;
|
||||
|
||||
/* LLGR mode default based on GR mode */
|
||||
if (cf->llgr_mode < 0)
|
||||
cf->llgr_mode = cf->gr_mode ? BGP_LLGR_AWARE : 0;
|
||||
|
||||
/* Link check for single-hop BGP by default */
|
||||
if (cf->check_link < 0)
|
||||
cf->check_link = !cf->multihop;
|
||||
|
@ -1676,6 +1772,9 @@ bgp_postconfig(struct proto_config *CF)
|
|||
if (cf->multihop && cf->bfd && ipa_zero(cf->local_ip))
|
||||
cf_error("Multihop BGP with BFD requires specified local address");
|
||||
|
||||
if (!cf->gr_mode && cf->llgr_mode)
|
||||
cf_error("Long-lived graceful restart requires basic graceful restart");
|
||||
|
||||
|
||||
struct bgp_channel_config *cc;
|
||||
WALK_LIST(cc, CF->channels)
|
||||
|
@ -1706,10 +1805,16 @@ bgp_postconfig(struct proto_config *CF)
|
|||
if (!cc->gw_mode)
|
||||
cc->gw_mode = cf->multihop ? GW_RECURSIVE : GW_DIRECT;
|
||||
|
||||
/* Default based on proto config */
|
||||
/* Defaults based on proto config */
|
||||
if (cc->gr_able == 0xff)
|
||||
cc->gr_able = (cf->gr_mode == BGP_GR_ABLE);
|
||||
|
||||
if (cc->llgr_able == 0xff)
|
||||
cc->llgr_able = (cf->llgr_mode == BGP_LLGR_ABLE);
|
||||
|
||||
if (cc->llgr_time == ~0U)
|
||||
cc->llgr_time = cf->llgr_time;
|
||||
|
||||
/* Default values of IGP tables */
|
||||
if ((cc->gw_mode == GW_RECURSIVE) && !cc->desc->no_igp)
|
||||
{
|
||||
|
@ -1885,6 +1990,7 @@ static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "Ope
|
|||
static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
|
||||
static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "Link down", "BFD session down", "Graceful restart"};
|
||||
static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
|
||||
static char *bgp_gr_states[] = { "None", "Regular", "Long-lived"};
|
||||
|
||||
static const char *
|
||||
bgp_last_errmsg(struct bgp_proto *p)
|
||||
|
@ -1963,6 +2069,7 @@ bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps)
|
|||
uint any_gr_able = 0;
|
||||
uint any_add_path = 0;
|
||||
uint any_ext_next_hop = 0;
|
||||
uint any_llgr_able = 0;
|
||||
u32 *afl1 = alloca(caps->af_count * sizeof(u32));
|
||||
u32 *afl2 = alloca(caps->af_count * sizeof(u32));
|
||||
uint afn1, afn2;
|
||||
|
@ -1973,6 +2080,7 @@ bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps)
|
|||
any_gr_able |= ac->gr_able;
|
||||
any_add_path |= ac->add_path;
|
||||
any_ext_next_hop |= ac->ext_next_hop;
|
||||
any_llgr_able |= ac->llgr_able;
|
||||
}
|
||||
|
||||
if (any_mp_bgp)
|
||||
|
@ -2052,6 +2160,32 @@ bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps)
|
|||
|
||||
if (caps->enhanced_refresh)
|
||||
cli_msg(-1006, " Enhanced refresh");
|
||||
|
||||
if (caps->llgr_aware)
|
||||
cli_msg(-1006, " Long-lived graceful restart");
|
||||
|
||||
if (any_llgr_able)
|
||||
{
|
||||
u32 stale_time = 0;
|
||||
|
||||
afn1 = afn2 = 0;
|
||||
WALK_AF_CAPS(caps, ac)
|
||||
{
|
||||
stale_time = MAX(stale_time, ac->llgr_time);
|
||||
|
||||
if (ac->llgr_able && ac->llgr_time)
|
||||
afl1[afn1++] = ac->afi;
|
||||
|
||||
if (ac->llgr_flags & BGP_GRF_FORWARDING)
|
||||
afl2[afn2++] = ac->afi;
|
||||
}
|
||||
|
||||
/* Continues from llgr_aware */
|
||||
cli_msg(-1006, " LL stale time: %u", stale_time);
|
||||
|
||||
bgp_show_afis(-1006, " AF supported:", afl1, afn1);
|
||||
bgp_show_afis(-1006, " AF preserved:", afl2, afn2);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -2118,6 +2252,12 @@ bgp_show_proto_info(struct proto *P)
|
|||
{
|
||||
channel_show_info(&c->c);
|
||||
|
||||
if (p->gr_active_num)
|
||||
cli_msg(-1006, " Neighbor GR: %s", bgp_gr_states[c->gr_active]);
|
||||
|
||||
if (tm_active(c->stale_timer))
|
||||
cli_msg(-1006, " LL stale timer: %t/-", tm_remains(c->stale_timer));
|
||||
|
||||
if (c->c.channel_state == CS_UP)
|
||||
{
|
||||
if (ipa_zero(c->link_addr))
|
||||
|
|
|
@ -107,9 +107,11 @@ struct bgp_config {
|
|||
int allow_local_as; /* Allow that number of local ASNs in incoming AS_PATHs */
|
||||
int allow_local_pref; /* Allow LOCAL_PREF in EBGP sessions */
|
||||
int gr_mode; /* Graceful restart mode (BGP_GR_*) */
|
||||
int llgr_mode; /* Long-lived graceful restart mode (BGP_LLGR_*) */
|
||||
int setkey; /* Set MD5 password to system SA/SP database */
|
||||
/* Times below are in seconds */
|
||||
unsigned gr_time; /* Graceful restart timeout */
|
||||
unsigned llgr_time; /* Long-lived graceful restart stale time */
|
||||
unsigned connect_delay_time; /* Minimum delay between connect attempts */
|
||||
unsigned connect_retry_time; /* Timeout for connect attempts */
|
||||
unsigned hold_time, initial_hold_time;
|
||||
|
@ -138,6 +140,8 @@ struct bgp_channel_config {
|
|||
u8 gw_mode; /* How we compute route gateway from next_hop attr, see GW_* */
|
||||
u8 secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */
|
||||
u8 gr_able; /* Allow full graceful restart for the channel */
|
||||
u8 llgr_able; /* Allow full long-lived GR for the channel */
|
||||
uint llgr_time; /* Long-lived graceful restart stale time */
|
||||
u8 ext_next_hop; /* Allow both IPv4 and IPv6 next hops */
|
||||
u8 add_path; /* Use ADD-PATH extension [RFC 7911] */
|
||||
|
||||
|
@ -166,12 +170,26 @@ struct bgp_channel_config {
|
|||
/* For GR capability per-AF flags */
|
||||
#define BGP_GRF_FORWARDING 0x80
|
||||
|
||||
#define BGP_LLGR_ABLE 1
|
||||
#define BGP_LLGR_AWARE 2
|
||||
|
||||
#define BGP_LLGRF_FORWARDING 0x80
|
||||
|
||||
#define BGP_GRS_NONE 0 /* No GR */
|
||||
#define BGP_GRS_ACTIVE 1 /* Graceful restart per RFC 4724 */
|
||||
#define BGP_GRS_LLGR 2 /* Long-lived GR phase (stale timer active) */
|
||||
|
||||
#define BGP_BFD_GRACEFUL 2 /* BFD down triggers graceful restart */
|
||||
|
||||
|
||||
struct bgp_af_caps {
|
||||
u32 afi;
|
||||
u8 ready; /* Multiprotocol capability, RFC 4760 */
|
||||
u8 gr_able; /* Graceful restart support, RFC 4724 */
|
||||
u8 gr_af_flags; /* Graceful restart per-AF flags */
|
||||
u8 llgr_able; /* Long-lived GR, RFC draft */
|
||||
u32 llgr_time; /* Long-lived GR stale time */
|
||||
u8 llgr_flags; /* Long-lived GR per-AF flags */
|
||||
u8 ext_next_hop; /* Extended IPv6 next hop, RFC 5549 */
|
||||
u8 add_path; /* Multiple paths support, RFC 7911 */
|
||||
};
|
||||
|
@ -188,6 +206,8 @@ struct bgp_caps {
|
|||
u8 gr_flags; /* Graceful restart flags */
|
||||
u16 gr_time; /* Graceful restart time in seconds */
|
||||
|
||||
u8 llgr_aware; /* Long-lived GR capability, RFC draft */
|
||||
|
||||
u16 af_count; /* Number of af_data items */
|
||||
|
||||
struct bgp_af_caps af_data[0]; /* Per-AF capability data */
|
||||
|
@ -243,6 +263,7 @@ struct bgp_proto {
|
|||
u8 route_refresh; /* Route refresh allowed to send [RFC 2918] */
|
||||
u8 enhanced_refresh; /* Enhanced refresh is negotiated [RFC 7313] */
|
||||
u8 gr_ready; /* Neighbor could do graceful restart */
|
||||
u8 llgr_ready; /* Neighbor could do Long-lived GR, implies gr_ready */
|
||||
u8 gr_active_num; /* Neighbor is doing GR, number of active channels */
|
||||
u8 channel_count; /* Number of active channels */
|
||||
u32 *afi_map; /* Map channel index -> AFI */
|
||||
|
@ -291,11 +312,14 @@ struct bgp_channel {
|
|||
|
||||
u32 packets_to_send; /* Bitmap of packet types to be sent */
|
||||
|
||||
u8 gr_ready; /* Neighbor could do GR on this AF */
|
||||
u8 gr_active; /* Neighbor is doing GR and keeping fwd state */
|
||||
|
||||
u8 ext_next_hop; /* Session allows both IPv4 and IPv6 next hops */
|
||||
|
||||
u8 gr_ready; /* Neighbor could do GR on this AF */
|
||||
u8 gr_active; /* Neighbor is doing GR (BGP_GRS_*) */
|
||||
|
||||
timer *stale_timer; /* Long-lived stale timer for LLGR */
|
||||
u32 stale_time; /* Stored LLGR stale time from last session */
|
||||
|
||||
u8 add_path_rx; /* Session expects receive of ADD-PATH extended NLRI */
|
||||
u8 add_path_tx; /* Session expects transmit of ADD-PATH extended NLRI */
|
||||
|
||||
|
@ -505,6 +529,7 @@ void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *bp);
|
|||
int bgp_rte_better(struct rte *, struct rte *);
|
||||
int bgp_rte_mergable(rte *pri, rte *sec);
|
||||
int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best);
|
||||
struct rte *bgp_rte_modify_stale(struct rte *r, struct linpool *pool);
|
||||
void bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old);
|
||||
int bgp_import_control(struct proto *, struct rte **, struct linpool *);
|
||||
int bgp_get_attr(struct eattr *e, byte *buf, int buflen);
|
||||
|
@ -645,6 +670,9 @@ void bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to);
|
|||
#define BGP_COMM_NO_ADVERTISE 0xffffff02 /* Don't export at all */
|
||||
#define BGP_COMM_NO_EXPORT_SUBCONFED 0xffffff03 /* NO_EXPORT even in local confederation */
|
||||
|
||||
#define BGP_COMM_LLGR_STALE 0xffff0006 /* Route is stale according to LLGR */
|
||||
#define BGP_COMM_NO_LLGR 0xffff0007 /* Do not treat the route according to LLGR */
|
||||
|
||||
/* Origins */
|
||||
|
||||
#define ORIGIN_IGP 0
|
||||
|
|
|
@ -28,7 +28,8 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE,
|
|||
BGP_CLUSTER_LIST, IGP, TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL,
|
||||
SECURITY, DETERMINISTIC, SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX,
|
||||
GRACEFUL, RESTART, AWARE, CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY,
|
||||
STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6)
|
||||
STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6, LONG,
|
||||
LIVED, STALE)
|
||||
|
||||
%type <i32> bgp_afi
|
||||
|
||||
|
@ -63,6 +64,8 @@ bgp_proto_start: proto_start BGP {
|
|||
BGP_CFG->default_local_pref = 100;
|
||||
BGP_CFG->gr_mode = BGP_GR_AWARE;
|
||||
BGP_CFG->gr_time = 120;
|
||||
BGP_CFG->llgr_mode = -1;
|
||||
BGP_CFG->llgr_time = 3600;
|
||||
BGP_CFG->setkey = 1;
|
||||
BGP_CFG->check_link = -1;
|
||||
}
|
||||
|
@ -161,9 +164,13 @@ bgp_proto:
|
|||
| bgp_proto GRACEFUL RESTART bool ';' { BGP_CFG->gr_mode = $4; }
|
||||
| bgp_proto GRACEFUL RESTART AWARE ';' { BGP_CFG->gr_mode = BGP_GR_AWARE; }
|
||||
| bgp_proto GRACEFUL RESTART TIME expr ';' { BGP_CFG->gr_time = $5; }
|
||||
| bgp_proto LONG LIVED GRACEFUL RESTART bool ';' { BGP_CFG->llgr_mode = $6; }
|
||||
| bgp_proto LONG LIVED GRACEFUL RESTART AWARE ';' { BGP_CFG->llgr_mode = BGP_LLGR_AWARE; }
|
||||
| bgp_proto LONG LIVED STALE TIME expr ';' { BGP_CFG->llgr_time = $6; }
|
||||
| bgp_proto TTL SECURITY bool ';' { BGP_CFG->ttl_security = $4; }
|
||||
| bgp_proto CHECK LINK bool ';' { BGP_CFG->check_link = $4; }
|
||||
| bgp_proto BFD bool ';' { BGP_CFG->bfd = $3; cf_check_bfd($3); }
|
||||
| bgp_proto BFD GRACEFUL ';' { BGP_CFG->bfd = BGP_BFD_GRACEFUL; cf_check_bfd(1); }
|
||||
;
|
||||
|
||||
bgp_afi:
|
||||
|
@ -199,6 +206,8 @@ bgp_channel_start: bgp_afi
|
|||
BGP_CC->afi = $1;
|
||||
BGP_CC->desc = desc;
|
||||
BGP_CC->gr_able = 0xff; /* undefined */
|
||||
BGP_CC->llgr_able = 0xff; /* undefined */
|
||||
BGP_CC->llgr_time = ~0U; /* undefined */
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -214,6 +223,8 @@ bgp_channel_item:
|
|||
| GATEWAY RECURSIVE { BGP_CC->gw_mode = GW_RECURSIVE; }
|
||||
| SECONDARY bool { BGP_CC->secondary = $2; }
|
||||
| GRACEFUL RESTART bool { BGP_CC->gr_able = $3; }
|
||||
| LONG LIVED GRACEFUL RESTART bool { BGP_CC->llgr_able = $5; }
|
||||
| LONG LIVED STALE TIME expr { BGP_CC->llgr_time = $5; }
|
||||
| EXTENDED NEXT HOP bool { BGP_CC->ext_next_hop = $4; }
|
||||
| ADD PATHS RX { BGP_CC->add_path = BGP_ADD_PATH_RX; }
|
||||
| ADD PATHS TX { BGP_CC->add_path = BGP_ADD_PATH_TX; }
|
||||
|
|
|
@ -260,6 +260,9 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
|
|||
caps->gr_flags = p->p.gr_recovery ? BGP_GRF_RESTART : 0;
|
||||
}
|
||||
|
||||
if (p->cf->llgr_mode)
|
||||
caps->llgr_aware = 1;
|
||||
|
||||
/* Allocate and fill per-AF fields */
|
||||
WALK_LIST(c, p->p.channels)
|
||||
{
|
||||
|
@ -280,6 +283,15 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
|
|||
if (p->p.gr_recovery)
|
||||
ac->gr_af_flags |= BGP_GRF_FORWARDING;
|
||||
}
|
||||
|
||||
if (c->cf->llgr_able)
|
||||
{
|
||||
ac->llgr_able = 1;
|
||||
ac->llgr_time = c->cf->llgr_time;
|
||||
|
||||
if (p->p.gr_recovery)
|
||||
ac->llgr_flags |= BGP_LLGRF_FORWARDING;
|
||||
}
|
||||
}
|
||||
|
||||
/* Sort capability fields by AFI/SAFI */
|
||||
|
@ -289,9 +301,9 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
|
|||
/* Create capability list in buffer */
|
||||
|
||||
/*
|
||||
* Note that max length is ~ 20+14*af_count. With max 12 channels that is
|
||||
* 188. Option limit is 253 and buffer size is 4096, so we cannot overflow
|
||||
* unless we add new capabilities or more AFs.
|
||||
* Note that max length is ~ 22+21*af_count. With max 12 channels that is
|
||||
* 274. Option limit is 253 and buffer size is 4096, so we cannot overflow
|
||||
* unless we add new capabilities or more AFs. XXXXX
|
||||
*/
|
||||
|
||||
WALK_AF_CAPS(caps, ac)
|
||||
|
@ -384,6 +396,24 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
|
|||
*buf++ = 0; /* Capability data length */
|
||||
}
|
||||
|
||||
if (caps->llgr_aware)
|
||||
{
|
||||
*buf++ = 71; /* Capability 71: Support for long-lived graceful restart */
|
||||
*buf++ = 0; /* Capability data length, will be fixed later */
|
||||
data = buf;
|
||||
|
||||
WALK_AF_CAPS(caps, ac)
|
||||
if (ac->llgr_able)
|
||||
{
|
||||
put_af3(buf, ac->afi);
|
||||
buf[3] = ac->llgr_flags;
|
||||
put_u24(buf+4, ac->llgr_time);
|
||||
buf += 7;
|
||||
}
|
||||
|
||||
data[-1] = buf - data;
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
@ -508,11 +538,49 @@ bgp_read_capabilities(struct bgp_conn *conn, struct bgp_caps *caps, byte *pos, i
|
|||
caps->enhanced_refresh = 1;
|
||||
break;
|
||||
|
||||
case 71: /* Long lived graceful restart capability, RFC draft */
|
||||
if (cl % 7)
|
||||
goto err;
|
||||
|
||||
/* Presumably, only the last instance is valid */
|
||||
WALK_AF_CAPS(caps, ac)
|
||||
{
|
||||
ac->llgr_able = 0;
|
||||
ac->llgr_flags = 0;
|
||||
ac->llgr_time = 0;
|
||||
}
|
||||
|
||||
caps->llgr_aware = 1;
|
||||
|
||||
for (i = 0; i < cl; i += 7)
|
||||
{
|
||||
af = get_af3(pos+2+i);
|
||||
ac = bgp_get_af_caps(caps, af);
|
||||
ac->llgr_able = 1;
|
||||
ac->llgr_flags = pos[2+i+3];
|
||||
ac->llgr_time = get_u24(pos + 2+i+4);
|
||||
}
|
||||
break;
|
||||
|
||||
/* We can safely ignore all other capabilities */
|
||||
}
|
||||
|
||||
ADVANCE(pos, len, 2 + cl);
|
||||
}
|
||||
|
||||
/* The LLGR capability must be advertised together with the GR capability,
|
||||
otherwise it must be disregarded */
|
||||
if (!caps->gr_aware && caps->llgr_aware)
|
||||
{
|
||||
caps->llgr_aware = 0;
|
||||
WALK_AF_CAPS(caps, ac)
|
||||
{
|
||||
ac->llgr_able = 0;
|
||||
ac->llgr_flags = 0;
|
||||
ac->llgr_time = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
err:
|
||||
|
@ -1131,6 +1199,7 @@ bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0)
|
|||
|
||||
e->pflags = 0;
|
||||
e->u.bgp.suppressed = 0;
|
||||
e->u.bgp.stale = -1;
|
||||
rte_update2(&s->channel->c, n, e, s->last_src);
|
||||
}
|
||||
|
||||
|
|
|
@ -78,6 +78,12 @@ pipe_rt_notify(struct proto *P, struct channel *src_ch, net *n, rte *new, rte *o
|
|||
e->pref = new->pref;
|
||||
e->pflags = new->pflags;
|
||||
|
||||
#ifdef CONFIG_BGP
|
||||
/* Hack to cleanup cached value */
|
||||
if (e->attrs->src->proto->proto == &proto_bgp)
|
||||
e->u.bgp.stale = -1;
|
||||
#endif
|
||||
|
||||
src = a->src;
|
||||
}
|
||||
else
|
||||
|
|
Loading…
Reference in a new issue