Merge branch 'master' into mq-filter-stack

This commit is contained in:
Maria Matejka 2019-07-03 11:09:52 +02:00
commit eac9250fd5
33 changed files with 1228 additions and 144 deletions

7
.dir-locals.el Normal file
View file

@ -0,0 +1,7 @@
; BIRD project coding conventions
((c-mode
(c-file-style . "bsd")
(c-basic-offset . 2)
(fill-column . 80)
(show-trailing-whitespace . t)))

View file

@ -88,7 +88,7 @@ HASH_DEFINE_REHASH_FN(SYM, struct symbol)
HASH(struct keyword) kw_hash;
static struct sym_scope *conf_this_scope;
struct sym_scope *conf_this_scope;
linpool *cfg_mem;
@ -719,7 +719,8 @@ cf_lex_init(int is_cli, struct config *c)
else
BEGIN(INITIAL);
conf_this_scope = cfg_allocz(sizeof(struct sym_scope));
c->root_scope = cfg_allocz(sizeof(struct sym_scope));
conf_this_scope = c->root_scope;
conf_this_scope->active = 1;
}

View file

@ -447,6 +447,24 @@ config_undo(void)
return CONF_PROGRESS;
}
int
config_status(void)
{
if (shutting_down)
return CONF_SHUTDOWN;
if (configuring)
return future_cftype ? CONF_QUEUED : CONF_PROGRESS;
return CONF_DONE;
}
btime
config_timer_status(void)
{
return tm_active(config_timer) ? tm_remains(config_timer) : -1;
}
extern void cmd_reconfig_undo_notify(void);
static void
@ -477,19 +495,24 @@ config_init(void)
* for switching to an empty configuration.
*/
void
order_shutdown(void)
order_shutdown(int gr)
{
struct config *c;
if (shutting_down)
return;
log(L_INFO "Shutting down");
if (!gr)
log(L_INFO "Shutting down");
else
log(L_INFO "Shutting down for graceful restart");
c = lp_alloc(config->mem, sizeof(struct config));
memcpy(c, config, sizeof(struct config));
init_list(&c->protos);
init_list(&c->tables);
c->shutdown = 1;
c->gr_down = gr;
config_commit(c, RECONFIG_HARD, 0);
shutting_down = 1;

View file

@ -53,8 +53,10 @@ struct config {
int file_fd; /* File descriptor of main configuration file */
HASH(struct symbol) sym_hash; /* Lexer: symbol hash table */
struct config *fallback; /* Link to regular config for CLI parsing */
struct sym_scope *root_scope; /* Scope for root symbols */
int obstacle_count; /* Number of items blocking freeing of this config */
int shutdown; /* This is a pseudo-config for daemon shutdown */
int gr_down; /* This is a pseudo-config for graceful restart */
btime load_time; /* When we've got this configuration */
};
@ -69,11 +71,13 @@ void config_free(struct config *);
int config_commit(struct config *, int type, uint timeout);
int config_confirm(void);
int config_undo(void);
int config_status(void);
btime config_timer_status(void);
void config_init(void);
void cf_error(const char *msg, ...) NORET;
void config_add_obstacle(struct config *);
void config_del_obstacle(struct config *);
void order_shutdown(void);
void order_shutdown(int gr);
#define RECONFIG_NONE 0
#define RECONFIG_HARD 1
@ -167,6 +171,8 @@ struct include_file_stack {
extern struct include_file_stack *ifs;
extern struct sym_scope *conf_this_scope;
int cf_lex(void);
void cf_lex_init(int is_cli, struct config *c);
void cf_lex_unwind(void);

View file

@ -561,7 +561,7 @@ include "tablename.conf";;
can be seen (together with other symbols) using 'show symbols' command.
<tag><label id="opt-attribute">attribute <m/type/ <m/name/</tag>
Define a custom route attribute. You can set and get it in filters like
Declare a custom route attribute. You can set and get it in filters like
any other route atribute. This feature is intended for marking routes
in import filters for export filtering purposes instead of locally
assigned BGP communities which have to be deleted in export filters.
@ -1159,7 +1159,7 @@ int var;
<p>As you can see, a filter has a header, a list of local variables, and a body.
The header consists of the <cf/filter/ keyword followed by a (unique) name of
filter. The list of local variables consists of <cf><M>type name</M>;</cf>
pairs where each pair defines one local variable. The body consists of <cf>
pairs where each pair declares one local variable. The body consists of <cf>
{ <M>statements</M> }</cf>. Each <m/statement/ is terminated by a <cf/;/. You
can group several statements to a single compound statement by using braces
(<cf>{ <M>statements</M> }</cf>) which is useful if you want to make a bigger
@ -1188,7 +1188,7 @@ called like in C: <cf>name(); with_parameters(5);</cf>. Function may return
values using the <cf>return <m/[expr]/</cf> command. Returning a value exits
from current function (this is similar to C).
<p>Filters are declared in a way similar to functions except they can't have
<p>Filters are defined in a way similar to functions except they can't have
explicit parameters. They get a route table entry as an implicit parameter, it
is also passed automatically to any functions called. The filter must terminate
with either <cf/accept/ or <cf/reject/ statement. If there's a runtime error in
@ -1571,11 +1571,20 @@ if 1234 = i then printn "."; else {
<label id="route-attributes">
<p>A filter is implicitly passed a route, and it can access its attributes just
like it accesses variables. Attempts to access undefined attribute result in a
runtime error; you can check if an attribute is defined by using the
<cf>defined( <m>attribute</m> )</cf> operator. One notable exception to this
rule are attributes of bgppath and *clist types, where undefined value is
regarded as empty bgppath/*clist for most purposes.
like it accesses variables. There are common route attributes, protocol-specific
route attributes and custom route attributes. Most common attributes are
mandatory (always defined), while remaining are optional. Attempts to access
undefined attribute result in a runtime error; you can check if an attribute is
defined by using the <cf>defined( <m>attribute</m> )</cf> operator. One notable
exception to this rule are attributes of bgppath and *clist types, where
undefined value is regarded as empty bgppath/*clist for most purposes.
Attributes can be defined by just setting them in filters. Custom attributes
have to be first declared by <ref id="opt-attribute" name="attribute"> global
option. You can also undefine optional attribute back to non-existence by using
the <cf>unset( <m/attribute/ )</cf> operator.
Common route attributes are:
<descrip>
<tag><label id="rta-net"><m/prefix/ net</tag>
@ -1642,8 +1651,8 @@ regarded as empty bgppath/*clist for most purposes.
compare internal distances to boundary routers (see below).
</descrip>
<p>There also exist protocol-specific attributes which are described in the
corresponding protocol sections.
<p>Protocol-specific route attributes are described in the corresponding
protocol sections.
<sect>Other statements
@ -1653,7 +1662,7 @@ corresponding protocol sections.
<descrip>
<tag><label id="assignment"><m/variable/ = <m/expr/</tag>
Set variable to a given value.
Set variable (or route attribute) to a given value.
<tag><label id="filter-accept-reject">accept|reject [ <m/expr/ ]</tag>
Accept or reject the route, possibly printing <cf><m>expr</m></cf>.
@ -2186,12 +2195,23 @@ using the following configuration parameters:
<cf/local 10.0.0.1; local as 65000;/ are valid). This parameter is
mandatory.
<tag><label id="bgp-neighbor">neighbor [<m/ip/] [port <m/number/] [as <m/number/]</tag>
<tag><label id="bgp-neighbor">neighbor [<m/ip/ | range <m/prefix/] [port <m/number/] [as <m/number/] [internal|external]</tag>
Define neighboring router this instance will be talking to and what AS
it is located in. In case the neighbor is in the same AS as we are, we
automatically switch to iBGP. Optionally, the remote port may also be
specified. Like <cf/local/ parameter, this parameter may also be used
multiple times with different sub-options. This parameter is mandatory.
automatically switch to IBGP. Alternatively, it is possible to specify
just <cf/internal/ or </cf/external/ instead of AS number, in that case
either local AS number, or any external AS number is accepted.
Optionally, the remote port may also be specified. Like <cf/local/
parameter, this parameter may also be used multiple times with different
sub-options. This parameter is mandatory.
It is possible to specify network prefix (with <cf/range/ keyword)
instead of explicit neighbor IP address. This enables dynamic BGP
behavior, where the BGP instance listens on BGP port, but new BGP
instances are spawned for incoming BGP connections (if source address
matches the network prefix). It is possible to mix regular BGP instances
with dynamic BGP instances and have multiple dynamic BGP instances with
different ranges.
<tag><label id="bgp-iface">interface <m/string/</tag>
Define interface we should use for link-local BGP IPv6 sessions.
@ -2224,6 +2244,16 @@ using the following configuration parameters:
session. Default: the address of the local end of the interface our
neighbor is connected to.
<tag><label id="bgp-dynamic-name">dynamic name "<m/text/"</tag>
Define common prefix of names used for new BGP instances spawned when
dynamic BGP behavior is active. Actual names also contain numberic
index to distinguish individual instances. Default: "dynbgp".
<tag><label id="bgp-dynamic-name">dynamic name digits <m/number/</tag>
Define minimum number of digits for index in names of spawned dynamic
BGP instances. E.g., if set to 2, then the first name would be
"dynbgp01". Default: 0.
<tag><label id="bgp-strict-bind">strict bind <m/switch/</tag>
Specify whether BGP listening socket should be bound to a specific local
address (the same as the <cf/source address/) and associated interface,
@ -2565,6 +2595,15 @@ be used in explicit configuration.
<p>BGP channels have additional config options (together with the common ones):
<descrip>
<tag><label id="bgp-mandatory">mandatory <m/switch/</tag>
When local and neighbor sets of configured AFI/SAFI pairs differ,
capability negotiation ensures that a common subset is used. For
mandatory channels their associated AFI/SAFI must be negotiated
(i.e., also announced by the neighbor), otherwise BGP session
negotiation fails with <it/'Required capability missing'/ error.
Regardless, at least one AFI/SAFI must be negotiated in order to BGP
session be successfully established. Default: off.
<tag><label id="bgp-next-hop-keep">next hop keep <m/switch/|ibgp|ebgp</tag>
Do not modify the Next Hop attribute and advertise the current one
unchanged even in cases where our own local address should be used
@ -3230,6 +3269,8 @@ protocol ospf [v2|v3] &lt;name&gt; {
tick &lt;num&gt;;
ecmp &lt;switch&gt; [limit &lt;num&gt;];
merge external &lt;switch&gt;;
graceful restart &lt;switch&gt;|aware;
graceful restart time &lt;num&gt;;
area &lt;id&gt; {
stub;
nssa;
@ -3373,6 +3414,31 @@ protocol ospf [v2|v3] &lt;name&gt; {
from different LSAs are treated as separate even if they represents the
same destination. Default value is no.
<tag><label id="ospf-graceful-restart">graceful restart <m/switch/|aware</tag>
When an OSPF instance is restarted, neighbors break adjacencies and
recalculate their routing tables, which disrupts packet forwarding even
when the forwarding plane of the restarting router remains intact.
<rfc id="3623"> specifies a graceful restart mechanism to alleviate this
issue. For OSPF graceful restart, restarting router originates
Grace-LSAs, announcing intent to do graceful restart. Neighbors
receiving these LSAs enter helper mode, in which they ignore breakdown
of adjacencies, behave as if nothing is happening and keep old routes.
When adjacencies are reestablished, the restarting router flushes
Grace-LSAs and graceful restart is ended.
This option controls the graceful restart mechanism. It has three
states: Disabled, when no support is provided. Aware, when graceful
restart helper mode is supported, but no local graceful restart is
allowed (i.e. helper-only role). Enabled, when the full graceful restart
support is provided (i.e. both restarting and helper role). Note that
proper support for local graceful restart requires also configuration of
other protocols. Default: aware.
<tag><label id="ospf-graceful-restart-time">graceful restart time <m/num/</tag>
The restart time is announced in the Grace-LSA and specifies how long
neighbors should wait for proper end of the graceful restart before
exiting helper mode prematurely. Default: 120 seconds.
<tag><label id="ospf-area">area <M>id</M></tag>
This defines an OSPF area with given area ID (an integer or an IPv4
address, similarly to a router ID). The most important area is the

View file

@ -33,6 +33,7 @@ Reply codes of BIRD command-line interface
0022 Undo scheduled
0023 Evaluation of expression
0024 Graceful restart status report
0025 Graceful restart ordered
1000 BIRD version
1001 Interface list

View file

@ -874,6 +874,28 @@ proto_copy_config(struct proto_config *dest, struct proto_config *src)
dest->protocol->copy_config(dest, src);
}
void
proto_clone_config(struct symbol *sym, struct proto_config *parent)
{
struct proto_config *cf = proto_config_new(parent->protocol, SYM_PROTO);
proto_copy_config(cf, parent);
cf->name = sym->name;
cf->proto = NULL;
cf->parent = parent;
sym->class = cf->class;
sym->proto = cf;
}
static void
proto_undef_clone(struct symbol *sym, struct proto_config *cf)
{
rem_node(&cf->n);
sym->class = SYM_VOID;
sym->proto = NULL;
}
/**
* protos_preconfig - pre-configuration processing
* @c: new configuration
@ -973,6 +995,24 @@ protos_commit(struct config *new, struct config *old, int force_reconfig, int ty
{
p = oc->proto;
sym = cf_find_symbol(new, oc->name);
/* Handle dynamic protocols */
if (!sym && oc->parent && !new->shutdown)
{
struct symbol *parsym = cf_find_symbol(new, oc->parent->name);
if (parsym && parsym->class == SYM_PROTO)
{
/* This is hack, we would like to share config, but we need to copy it now */
new_config = new;
cfg_mem = new->mem;
conf_this_scope = new->root_scope;
sym = cf_get_symbol(oc->name);
proto_clone_config(sym, parsym->proto);
new_config = NULL;
cfg_mem = NULL;
}
}
if (sym && sym->class == SYM_PROTO && !new->shutdown)
{
/* Found match, let's check if we can smoothly switch to new configuration */
@ -984,6 +1024,12 @@ protos_commit(struct config *new, struct config *old, int force_reconfig, int ty
if (! force_reconfig && proto_reconfigure(p, oc, nc, type))
continue;
if (nc->parent)
{
proto_undef_clone(sym, nc);
goto remove;
}
/* Unsuccessful, we will restart it */
if (!p->disabled && !nc->disabled)
log(L_INFO "Restarting protocol %s", p->name);
@ -997,10 +1043,16 @@ protos_commit(struct config *new, struct config *old, int force_reconfig, int ty
}
else if (!new->shutdown)
{
remove:
log(L_INFO "Removing protocol %s", p->name);
p->down_code = PDC_CF_REMOVE;
p->cf_new = NULL;
}
else if (new->gr_down)
{
p->down_code = PDC_CMD_GR_DOWN;
p->cf_new = NULL;
}
else /* global shutdown */
{
p->down_code = PDC_CMD_SHUTDOWN;
@ -1105,6 +1157,15 @@ proto_rethink_goal(struct proto *p)
}
}
struct proto *
proto_spawn(struct proto_config *cf, uint disabled)
{
struct proto *p = proto_init(cf, TAIL(proto_list));
p->disabled = disabled;
proto_rethink_goal(p);
return p;
}
/**
* DOC: Graceful restart recovery

View file

@ -89,6 +89,7 @@ void protos_build(void);
void proto_build(struct protocol *);
void protos_preconfig(struct config *);
void protos_commit(struct config *new, struct config *old, int force_restart, int type);
struct proto * proto_spawn(struct proto_config *cf, uint disabled);
void protos_dump_all(void);
#define GA_UNKNOWN 0 /* Attribute not recognized */
@ -113,6 +114,7 @@ struct proto_config {
struct config *global; /* Global configuration data */
struct protocol *protocol; /* Protocol */
struct proto *proto; /* Instance we've created */
struct proto_config *parent; /* Parent proto_config for dynamic protocols */
char *name;
char *dsc;
int class; /* SYM_PROTO or SYM_TEMPLATE */
@ -255,6 +257,7 @@ struct proto_spec {
#define PDC_CMD_DISABLE 0x11 /* Result of disable command */
#define PDC_CMD_RESTART 0x12 /* Result of restart command */
#define PDC_CMD_SHUTDOWN 0x13 /* Result of global shutdown */
#define PDC_CMD_GR_DOWN 0x14 /* Result of global graceful restart */
#define PDC_RX_LIMIT_HIT 0x21 /* Route receive limit reached */
#define PDC_IN_LIMIT_HIT 0x22 /* Route import limit reached */
#define PDC_OUT_LIMIT_HIT 0x23 /* Route export limit reached */
@ -263,6 +266,7 @@ struct proto_spec {
void *proto_new(struct proto_config *);
void *proto_config_new(struct protocol *, int class);
void proto_copy_config(struct proto_config *dest, struct proto_config *src);
void proto_clone_config(struct symbol *sym, struct proto_config *parent);
void proto_set_message(struct proto *p, char *msg, int len);
void graceful_restart_recovery(void);

View file

@ -1302,7 +1302,7 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
#define PXH_FN(n,i,h) h
#define PXH_REHASH bgp_pxh_rehash
#define PXH_PARAMS /8, *2, 2, 2, 8, 20
#define PXH_PARAMS /8, *2, 2, 2, 8, 24
HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix)
@ -1730,7 +1730,7 @@ bgp_rte_better(rte *new, rte *old)
return 0;
/* RFC 4271 9.1.2.2. g) Compare peer IP adresses */
return (ipa_compare(new_bgp->cf->remote_ip, old_bgp->cf->remote_ip) < 0);
return ipa_compare(new_bgp->remote_ip, old_bgp->remote_ip) < 0;
}

View file

@ -129,6 +129,9 @@ static list bgp_sockets; /* Global list of listening sockets */
static void bgp_connect(struct bgp_proto *p);
static void bgp_active(struct bgp_proto *p);
static void bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn);
static void bgp_setup_sk(struct bgp_conn *conn, sock *s);
static void bgp_send_open(struct bgp_conn *conn);
static void bgp_update_bfd(struct bgp_proto *p, int use_bfd);
static int bgp_incoming_connection(sock *sk, uint dummy UNUSED);
@ -149,7 +152,7 @@ bgp_open(struct bgp_proto *p)
struct bgp_socket *bs = NULL;
struct iface *ifa = p->cf->strict_bind ? p->cf->iface : NULL;
ip_addr addr = p->cf->strict_bind ? p->cf->local_ip :
(ipa_is_ip4(p->cf->remote_ip) ? IPA_NONE4 : IPA_NONE6);
(p->ipv4 ? IPA_NONE4 : IPA_NONE6);
uint port = p->cf->local_port;
/* FIXME: Add some global init? */
@ -272,8 +275,17 @@ bgp_startup(struct bgp_proto *p)
BGP_TRACE(D_EVENTS, "Started");
p->start_state = BSS_CONNECT;
if (!p->cf->passive)
if (!p->passive)
bgp_active(p);
if (p->postponed_sk)
{
/* Apply postponed incoming connection */
bgp_setup_conn(p, &p->incoming_conn);
bgp_setup_sk(&p->incoming_conn, p->postponed_sk);
bgp_send_open(&p->incoming_conn);
p->postponed_sk = NULL;
}
}
static void
@ -387,7 +399,7 @@ bgp_close_conn(struct bgp_conn *conn)
void
bgp_update_startup_delay(struct bgp_proto *p)
{
struct bgp_config *cf = p->cf;
const struct bgp_config *cf = p->cf;
DBG("BGP: Updating startup delay\n");
@ -410,7 +422,7 @@ bgp_update_startup_delay(struct bgp_proto *p)
}
static void
bgp_graceful_close_conn(struct bgp_conn *conn, uint subcode, byte *data, uint len)
bgp_graceful_close_conn(struct bgp_conn *conn, int subcode, byte *data, uint len)
{
switch (conn->state)
{
@ -426,7 +438,13 @@ bgp_graceful_close_conn(struct bgp_conn *conn, uint subcode, byte *data, uint le
case BS_OPENSENT:
case BS_OPENCONFIRM:
case BS_ESTABLISHED:
bgp_error(conn, 6, subcode, data, len);
if (subcode < 0)
{
bgp_conn_enter_close_state(conn);
bgp_schedule_packet(conn, NULL, PKT_SCHEDULE_CLOSE);
}
else
bgp_error(conn, 6, subcode, data, len);
return;
default:
@ -456,7 +474,7 @@ bgp_decision(void *vp)
if ((p->p.proto_state == PS_START) &&
(p->outgoing_conn.state == BS_IDLE) &&
(p->incoming_conn.state != BS_OPENCONFIRM) &&
!p->cf->passive)
!p->passive)
bgp_active(p);
if ((p->p.proto_state == PS_STOP) &&
@ -465,8 +483,31 @@ bgp_decision(void *vp)
bgp_down(p);
}
static struct bgp_proto *
bgp_spawn(struct bgp_proto *pp, ip_addr remote_ip)
{
struct symbol *sym;
char fmt[SYM_MAX_LEN];
bsprintf(fmt, "%s%%0%dd", pp->cf->dynamic_name, pp->cf->dynamic_name_digits);
/* This is hack, we would like to share config, but we need to copy it now */
new_config = config;
cfg_mem = config->mem;
conf_this_scope = config->root_scope;
sym = cf_default_name(fmt, &(pp->dynamic_name_counter));
proto_clone_config(sym, pp->p.cf);
new_config = NULL;
cfg_mem = NULL;
/* Just pass remote_ip to bgp_init() */
((struct bgp_config *) sym->proto)->remote_ip = remote_ip;
return (void *) proto_spawn(sym->proto, 0);
}
void
bgp_stop(struct bgp_proto *p, uint subcode, byte *data, uint len)
bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len)
{
proto_notify_state(&p->p, PS_STOP);
bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len);
@ -491,6 +532,7 @@ bgp_conn_enter_openconfirm_state(struct bgp_conn *conn)
}
static const struct bgp_af_caps dummy_af_caps = { };
static const struct bgp_af_caps basic_af_caps = { .ready = 1 };
void
bgp_conn_enter_established_state(struct bgp_conn *conn)
@ -503,8 +545,12 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
BGP_TRACE(D_EVENTS, "BGP session established");
/* For multi-hop BGP sessions */
if (ipa_zero(p->source_addr))
p->source_addr = conn->sk->saddr;
if (ipa_zero(p->local_ip))
p->local_ip = conn->sk->saddr;
/* For promiscuous sessions */
if (!p->remote_as)
p->remote_as = conn->received_as;
/* In case of LLv6 is not valid during BGP start */
if (ipa_zero(p->link_addr) && p->neigh && p->neigh->iface && p->neigh->iface->llv6)
@ -541,6 +587,13 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi);
const struct bgp_af_caps *rem = bgp_find_af_caps(peer, c->afi);
/* Use default if capabilities were not announced */
if (!local->length && (c->afi == BGP_AF_IPV4))
loc = &basic_af_caps;
if (!peer->length && (c->afi == BGP_AF_IPV4))
rem = &basic_af_caps;
/* Ignore AFIs that were not announced in multiprotocol capability */
if (!loc || !loc->ready)
loc = &dummy_af_caps;
@ -880,6 +933,7 @@ bgp_send_open(struct bgp_conn *conn)
conn->sk->rx_hook = bgp_rx;
conn->sk->tx_hook = bgp_tx;
tm_stop(conn->connect_timer);
bgp_prepare_capabilities(conn);
bgp_schedule_packet(conn, NULL, PKT_OPEN);
bgp_conn_set_state(conn, BS_OPENSENT);
bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
@ -1039,8 +1093,8 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c
DBG("BGP: Connecting\n");
sock *s = sk_new(p->p.pool);
s->type = SK_TCP_ACTIVE;
s->saddr = p->source_addr;
s->daddr = p->cf->remote_ip;
s->saddr = p->local_ip;
s->daddr = p->remote_ip;
s->dport = p->cf->remote_port;
s->iface = p->neigh ? p->neigh->iface : NULL;
s->vrf = p->p.vrf;
@ -1075,6 +1129,9 @@ err:
return;
}
static inline int bgp_is_dynamic(struct bgp_proto *p)
{ return ipa_zero(p->remote_ip); }
/**
* bgp_find_proto - find existing proto for incoming connection
* @sk: TCP socket
@ -1083,6 +1140,7 @@ err:
static struct bgp_proto *
bgp_find_proto(sock *sk)
{
struct bgp_proto *best = NULL;
struct bgp_proto *p;
/* sk->iface is valid only if src or dst address is link-local */
@ -1090,13 +1148,20 @@ bgp_find_proto(sock *sk)
WALK_LIST(p, proto_list)
if ((p->p.proto == &proto_bgp) &&
(p->sock == sk->data) &&
ipa_equal(p->cf->remote_ip, sk->daddr) &&
(ipa_equal(p->remote_ip, sk->daddr) || bgp_is_dynamic(p)) &&
(!p->cf->remote_range || ipa_in_netX(sk->daddr, p->cf->remote_range)) &&
(p->p.vrf == sk->vrf) &&
(p->cf->local_port == sk->sport) &&
(!link || (p->cf->iface == sk->iface)) &&
(ipa_zero(p->cf->local_ip) || ipa_equal(p->cf->local_ip, sk->saddr)))
return p;
{
best = p;
return NULL;
if (!bgp_is_dynamic(p))
break;
}
return best;
}
/**
@ -1175,6 +1240,16 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED)
sk_reallocate(sk);
}
/* For dynamic BGP, spawn new instance and postpone the socket */
if (bgp_is_dynamic(p))
{
p = bgp_spawn(p, sk->daddr);
p->postponed_sk = sk;
rmove(sk, p->p.pool);
return 0;
}
rmove(sk, p->p.pool);
bgp_setup_conn(p, &p->incoming_conn);
bgp_setup_sk(&p->incoming_conn, sk);
bgp_send_open(&p->incoming_conn);
@ -1201,11 +1276,11 @@ bgp_start_neighbor(struct bgp_proto *p)
{
/* Called only for single-hop BGP sessions */
if (ipa_zero(p->source_addr))
p->source_addr = p->neigh->ifa->ip;
if (ipa_zero(p->local_ip))
p->local_ip = p->neigh->ifa->ip;
if (ipa_is_link_local(p->source_addr))
p->link_addr = p->source_addr;
if (ipa_is_link_local(p->local_ip))
p->link_addr = p->local_ip;
else if (p->neigh->iface->llv6)
p->link_addr = p->neigh->iface->llv6->ip;
@ -1293,8 +1368,8 @@ bgp_bfd_notify(struct bfd_request *req)
static void
bgp_update_bfd(struct bgp_proto *p, int use_bfd)
{
if (use_bfd && !p->bfd_req)
p->bfd_req = bfd_request_session(p->p.pool, p->cf->remote_ip, p->source_addr,
if (use_bfd && !p->bfd_req && !bgp_is_dynamic(p))
p->bfd_req = bfd_request_session(p->p.pool, p->remote_ip, p->local_ip,
p->cf->multihop ? NULL : p->neigh->iface,
bgp_bfd_notify, p);
@ -1375,7 +1450,7 @@ static void
bgp_start_locked(struct object_lock *lock)
{
struct bgp_proto *p = lock->data;
struct bgp_config *cf = p->cf;
const struct bgp_config *cf = p->cf;
if (p->p.proto_state != PS_START)
{
@ -1385,17 +1460,17 @@ bgp_start_locked(struct object_lock *lock)
DBG("BGP: Got lock\n");
if (cf->multihop)
if (cf->multihop || bgp_is_dynamic(p))
{
/* Multi-hop sessions do not use neighbor entries */
bgp_initiate(p);
return;
}
neighbor *n = neigh_find(&p->p, cf->remote_ip, cf->iface, NEF_STICKY);
neighbor *n = neigh_find(&p->p, p->remote_ip, cf->iface, NEF_STICKY);
if (!n)
{
log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface);
log(L_ERR "%s: Invalid remote address %I%J", p->p.name, p->remote_ip, cf->iface);
/* As we do not start yet, we can just disable protocol */
p->p.disabled = 1;
bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
@ -1406,7 +1481,7 @@ bgp_start_locked(struct object_lock *lock)
p->neigh = n;
if (n->scope <= 0)
BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", cf->remote_ip, cf->iface);
BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", p->remote_ip, cf->iface);
else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name);
else
@ -1417,14 +1492,29 @@ static int
bgp_start(struct proto *P)
{
struct bgp_proto *p = (struct bgp_proto *) P;
struct object_lock *lock;
const struct bgp_config *cf = p->cf;
p->local_ip = cf->local_ip;
p->local_as = cf->local_as;
p->remote_as = cf->remote_as;
p->public_as = cf->local_as;
/* For dynamic BGP childs, remote_ip is already set */
if (ipa_nonzero(cf->remote_ip))
p->remote_ip = cf->remote_ip;
/* Confederation ID is used for truly external peers */
if (p->cf->confederation && !p->is_interior)
p->public_as = cf->confederation;
p->passive = cf->passive || bgp_is_dynamic(p);
DBG("BGP: Startup.\n");
p->start_state = BSS_PREPARE;
p->outgoing_conn.state = BS_IDLE;
p->incoming_conn.state = BS_IDLE;
p->neigh = NULL;
p->bfd_req = NULL;
p->postponed_sk = NULL;
p->gr_ready = 0;
p->gr_active_num = 0;
@ -1437,7 +1527,6 @@ bgp_start(struct proto *P)
p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
p->remote_id = 0;
p->source_addr = p->cf->local_ip;
p->link_addr = IPA_NONE;
/* Lock all channels when in GR recovery mode */
@ -1452,9 +1541,9 @@ bgp_start(struct proto *P)
* Before attempting to create the connection, we need to lock the port,
* so that we are the only instance attempting to talk with that neighbor.
*/
struct object_lock *lock;
lock = p->lock = olock_new(P->pool);
lock->addr = p->cf->remote_ip;
lock->addr = p->remote_ip;
lock->port = p->cf->remote_port;
lock->iface = p->cf->iface;
lock->vrf = p->cf->iface ? NULL : p->p.vrf;
@ -1472,7 +1561,7 @@ static int
bgp_shutdown(struct proto *P)
{
struct bgp_proto *p = (struct bgp_proto *) P;
uint subcode = 0;
int subcode = 0;
char *message = NULL;
byte *data = NULL;
@ -1493,6 +1582,7 @@ bgp_shutdown(struct proto *P)
case PDC_CMD_DISABLE:
case PDC_CMD_SHUTDOWN:
shutdown:
subcode = 2; // Errcode 6, 2 - administrative shutdown
message = P->message;
break;
@ -1502,6 +1592,14 @@ bgp_shutdown(struct proto *P)
message = P->message;
break;
case PDC_CMD_GR_DOWN:
if ((p->cf->gr_mode != BGP_GR_ABLE) &&
(p->cf->llgr_mode != BGP_LLGR_ABLE))
goto shutdown;
subcode = -1; // Do not send NOTIFICATION, just close the connection
break;
case PDC_RX_LIMIT_HIT:
case PDC_IN_LIMIT_HIT:
subcode = 1; // Errcode 6, 1 - max number of prefixes reached
@ -1528,7 +1626,7 @@ bgp_shutdown(struct proto *P)
if (message)
{
uint msg_len = strlen(message);
msg_len = MIN(msg_len, 128);
msg_len = MIN(msg_len, 255);
/* Buffer will be freed automatically by protocol shutdown */
data = mb_alloc(p->p.pool, msg_len + 1);
@ -1562,17 +1660,21 @@ bgp_init(struct proto_config *CF)
P->rte_modify = bgp_rte_modify_stale;
p->cf = cf;
p->local_as = cf->local_as;
p->remote_as = cf->remote_as;
p->public_as = cf->local_as;
p->is_internal = (cf->local_as == cf->remote_as);
p->is_interior = p->is_internal || cf->confederation_member;
p->rs_client = cf->rs_client;
p->rr_client = cf->rr_client;
/* Confederation ID is used for truly external peers */
if (cf->confederation && !p->is_interior)
p->public_as = cf->confederation;
p->ipv4 = ipa_nonzero(cf->remote_ip) ?
ipa_is_ip4(cf->remote_ip) :
(cf->remote_range && (cf->remote_range->type == NET_IP4));
p->remote_ip = cf->remote_ip;
p->remote_as = cf->remote_as;
/* Hack: We use cf->remote_ip just to pass remote_ip from bgp_spawn() */
if (cf->c.parent)
cf->remote_ip = IPA_NONE;
/* Add all channels */
struct bgp_channel_config *cc;
@ -1604,7 +1706,7 @@ bgp_channel_start(struct channel *C)
{
struct bgp_proto *p = (void *) C->proto;
struct bgp_channel *c = (void *) C;
ip_addr src = p->source_addr;
ip_addr src = p->local_ip;
if (c->igp_table_ip4)
rt_lock_table(c->igp_table_ip4);
@ -1745,14 +1847,19 @@ void
bgp_postconfig(struct proto_config *CF)
{
struct bgp_config *cf = (void *) CF;
int internal = (cf->local_as == cf->remote_as);
int interior = internal || cf->confederation_member;
/* Do not check templates at all */
if (cf->c.class == SYM_TEMPLATE)
return;
/* Handle undefined remote_as, zero should mean unspecified external */
if (!cf->remote_as && (cf->peer_type == BGP_PT_INTERNAL))
cf->remote_as = cf->local_as;
int internal = (cf->local_as == cf->remote_as);
int interior = internal || cf->confederation_member;
/* EBGP direct by default, IBGP multihop by default */
if (cf->multihop < 0)
cf->multihop = internal ? 64 : 0;
@ -1769,11 +1876,20 @@ bgp_postconfig(struct proto_config *CF)
if (!cf->local_as)
cf_error("Local AS number must be set");
if (ipa_zero(cf->remote_ip))
if (ipa_zero(cf->remote_ip) && !cf->remote_range)
cf_error("Neighbor must be configured");
if (!cf->remote_as)
cf_error("Remote AS number must be set");
if (ipa_zero(cf->local_ip) && cf->strict_bind)
cf_error("Local address must be configured for strict bind");
if (!cf->remote_as && !cf->peer_type)
cf_error("Remote AS number (or peer type) must be set");
if ((cf->peer_type == BGP_PT_INTERNAL) && !internal)
cf_error("IBGP cannot have different ASNs");
if ((cf->peer_type == BGP_PT_EXTERNAL) && internal)
cf_error("EBGP cannot have the same ASNs");
if (!cf->iface && (ipa_is_link_local(cf->local_ip) ||
ipa_is_link_local(cf->remote_ip)))
@ -1885,8 +2001,8 @@ static int
bgp_reconfigure(struct proto *P, struct proto_config *CF)
{
struct bgp_proto *p = (void *) P;
struct bgp_config *new = (void *) CF;
struct bgp_config *old = p->cf;
const struct bgp_config *new = (void *) CF;
const struct bgp_config *old = p->cf;
if (proto_get_router_id(CF) != p->local_id)
return 0;
@ -1896,7 +2012,12 @@ bgp_reconfigure(struct proto *P, struct proto_config *CF)
// password item is last and must be checked separately
OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
&& ((!old->password && !new->password)
|| (old->password && new->password && !strcmp(old->password, new->password)));
|| (old->password && new->password && !strcmp(old->password, new->password)))
&& ((!old->remote_range && !new->remote_range)
|| (old->remote_range && new->remote_range && net_equal(old->remote_range, new->remote_range)))
&& ((!old->dynamic_name && !new->dynamic_name)
|| (old->dynamic_name && new->dynamic_name && !strcmp(old->dynamic_name, new->dynamic_name)))
&& (old->dynamic_name_digits == new->dynamic_name_digits);
/* FIXME: Move channel reconfiguration to generic protocol code ? */
struct channel *C, *C2;
@ -1926,6 +2047,9 @@ bgp_reconfigure(struct proto *P, struct proto_config *CF)
if (same)
p->cf = new;
/* Reset name counter */
p->dynamic_name_counter = 0;
return same;
}
@ -2056,7 +2180,7 @@ bgp_state_dsc(struct bgp_proto *p)
return "Down";
int state = MAX(p->incoming_conn.state, p->outgoing_conn.state);
if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->cf->passive)
if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->passive)
return "Passive";
return bgp_state_names[state];
@ -2232,8 +2356,14 @@ bgp_show_proto_info(struct proto *P)
struct bgp_proto *p = (struct bgp_proto *) P;
cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p));
cli_msg(-1006, " Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface);
if (bgp_is_dynamic(p) && p->cf->remote_range)
cli_msg(-1006, " Neighbor range: %N", p->cf->remote_range);
else
cli_msg(-1006, " Neighbor address: %I%J", p->remote_ip, p->cf->iface);
cli_msg(-1006, " Neighbor AS: %u", p->remote_as);
cli_msg(-1006, " Local AS: %u", p->cf->local_as);
if (p->gr_active_num)
cli_msg(-1006, " Neighbor graceful restart active");
@ -2269,7 +2399,7 @@ bgp_show_proto_info(struct proto *P)
p->rr_client ? " route-reflector" : "",
p->rs_client ? " route-server" : "",
p->as4_session ? " AS4" : "");
cli_msg(-1006, " Source address: %I", p->source_addr);
cli_msg(-1006, " Source address: %I", p->local_ip);
cli_msg(-1006, " Hold timer: %t/%u",
tm_remains(p->conn->hold_timer), p->conn->hold_time);
cli_msg(-1006, " Keepalive timer: %t/%u",

View file

@ -83,6 +83,7 @@ struct bgp_config {
struct iface *iface; /* Interface for link-local addresses */
u16 local_port; /* Local listening port */
u16 remote_port; /* Neighbor destination port */
int peer_type; /* Internal or external BGP (BGP_PT_*, optional) */
int multihop; /* Number of hops if multihop */
int strict_bind; /* Bind listening socket to local address */
int ttl_security; /* Enable TTL security [RFC 5082] */
@ -123,6 +124,9 @@ struct bgp_config {
u32 disable_after_cease; /* Disable it when cease is received, bitfield */
char *password; /* Password used for MD5 authentication */
net_addr *remote_range; /* Allowed neighbor range for dynamic BGP */
char *dynamic_name; /* Name pattern for dynamic BGP */
int dynamic_name_digits; /* Minimum number of digits for dynamic names */
int check_link; /* Use iface link state for liveness detection */
int bfd; /* Use BFD for liveness detection */
};
@ -136,6 +140,7 @@ struct bgp_channel_config {
ip_addr next_hop_addr; /* Local address for NEXT_HOP attribute */
u8 next_hop_self; /* Always set next hop to local IP address (NH_*) */
u8 next_hop_keep; /* Do not modify next hop attribute (NH_*) */
u8 mandatory; /* Channel is mandatory in capability negotiation */
u8 missing_lladdr; /* What we will do when we don' know link-local addr, see MLL_* */
u8 gw_mode; /* How we compute route gateway from next_hop attr, see GW_* */
u8 secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */
@ -151,6 +156,9 @@ struct bgp_channel_config {
struct rtable_config *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */
};
#define BGP_PT_INTERNAL 1
#define BGP_PT_EXTERNAL 2
#define NH_NO 0
#define NH_ALL 1
#define NH_IBGP 2
@ -213,8 +221,11 @@ struct bgp_caps {
u16 gr_time; /* Graceful restart time in seconds */
u8 llgr_aware; /* Long-lived GR capability, RFC draft */
u8 any_ext_next_hop; /* Bitwise OR of per-AF ext_next_hop */
u8 any_add_path; /* Bitwise OR of per-AF add_path */
u16 af_count; /* Number of af_data items */
u16 length; /* Length of capabilities in OPEN msg */
struct bgp_af_caps af_data[0]; /* Per-AF capability data */
};
@ -235,6 +246,7 @@ struct bgp_conn {
u8 state; /* State of connection state machine */
u8 as4_session; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */
u8 ext_messages; /* Session uses extended message length */
u32 received_as; /* ASN received in OPEN message */
struct bgp_caps *local_caps;
struct bgp_caps *remote_caps;
@ -254,18 +266,21 @@ struct bgp_conn {
struct bgp_proto {
struct proto p;
struct bgp_config *cf; /* Shortcut to BGP configuration */
const struct bgp_config *cf; /* Shortcut to BGP configuration */
ip_addr local_ip, remote_ip;
u32 local_as, remote_as;
u32 public_as; /* Externally visible ASN (local_as or confederation id) */
u32 local_id; /* BGP identifier of this router */
u32 remote_id; /* BGP identifier of the neighbor */
u32 rr_cluster_id; /* Route reflector cluster ID */
int start_state; /* Substates that partitions BS_START */
u8 start_state; /* Substates that partitions BS_START */
u8 is_internal; /* Internal BGP session (local_as == remote_as) */
u8 is_interior; /* Internal or intra-confederation BGP session */
u8 as4_session; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */
u8 rr_client; /* Whether neighbor is RR client of me */
u8 rs_client; /* Whether neighbor is RS client of me */
u8 ipv4; /* Use IPv4 connection, i.e. remote_ip is IPv4 */
u8 passive; /* Do not initiate outgoing connection */
u8 route_refresh; /* Route refresh allowed to send [RFC 2918] */
u8 enhanced_refresh; /* Enhanced refresh is negotiated [RFC 7313] */
u8 gr_ready; /* Neighbor could do graceful restart */
@ -282,11 +297,12 @@ struct bgp_proto {
struct neighbor *neigh; /* Neighbor entry corresponding to remote ip, NULL if multihop */
struct bgp_socket *sock; /* Shared listening socket */
struct bfd_request *bfd_req; /* BFD request, if BFD is used */
ip_addr source_addr; /* Local address used as an advertised next hop */
ip_addr link_addr; /* Link-local version of source_addr */
struct birdsock *postponed_sk; /* Postponed incoming socket for dynamic BGP */
ip_addr link_addr; /* Link-local version of local_ip */
event *event; /* Event for respawning and shutting process */
timer *startup_timer; /* Timer used to delay protocol startup due to previous errors (startup_delay) */
timer *gr_timer; /* Timer waiting for reestablishment after graceful restart */
int dynamic_name_counter; /* Counter for dynamic BGP names */
uint startup_delay; /* Delay (in seconds) of protocol startup due to previous errors */
btime last_proto_error; /* Time of last error that leads to protocol stop */
u8 last_error_class; /* Error class of last error */
@ -472,7 +488,7 @@ void bgp_graceful_restart_done(struct bgp_channel *c);
void bgp_refresh_begin(struct bgp_channel *c);
void bgp_refresh_end(struct bgp_channel *c);
void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code);
void bgp_stop(struct bgp_proto *p, uint subcode, byte *data, uint len);
void bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len);
struct rte_source *bgp_find_source(struct bgp_proto *p, u32 path_id);
struct rte_source *bgp_get_source(struct bgp_proto *p, u32 path_id);
@ -549,6 +565,7 @@ void bgp_get_route_info(struct rte *, byte *buf);
/* packets.c */
void bgp_dump_state_change(struct bgp_conn *conn, uint old, uint new);
void bgp_prepare_capabilities(struct bgp_conn *conn);
const struct bgp_af_desc *bgp_get_af_desc(u32 afi);
const struct bgp_af_caps *bgp_find_af_caps(struct bgp_caps *caps, u32 afi);
void bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type);

View file

@ -29,7 +29,8 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE,
SECURITY, DETERMINISTIC, SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX,
GRACEFUL, RESTART, AWARE, CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY,
STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6, LONG,
LIVED, STALE, IMPORT, IBGP, EBGP)
LIVED, STALE, IMPORT, IBGP, EBGP, MANDATORY, INTERNAL, EXTERNAL,
DYNAMIC, RANGE, NAME, DIGITS)
%type <i> bgp_nh
%type <i32> bgp_afi
@ -68,6 +69,7 @@ bgp_proto_start: proto_start BGP {
BGP_CFG->llgr_mode = -1;
BGP_CFG->llgr_time = 3600;
BGP_CFG->setkey = 1;
BGP_CFG->dynamic_name = "dynbgp";
BGP_CFG->check_link = -1;
}
;
@ -82,6 +84,8 @@ bgp_nbr_opts:
/* empty */
| bgp_nbr_opts PORT expr { BGP_CFG->remote_port = $3; if (($3<1) || ($3>65535)) cf_error("Invalid port number"); }
| bgp_nbr_opts AS expr { BGP_CFG->remote_as = $3; }
| bgp_nbr_opts INTERNAL { BGP_CFG->peer_type = BGP_PT_INTERNAL; }
| bgp_nbr_opts EXTERNAL { BGP_CFG->peer_type = BGP_PT_EXTERNAL; }
;
bgp_cease_mask:
@ -118,11 +122,18 @@ bgp_proto:
}
| bgp_proto NEIGHBOR bgp_nbr_opts ';'
| bgp_proto NEIGHBOR ipa ipa_scope bgp_nbr_opts ';' {
if (ipa_nonzero(BGP_CFG->remote_ip))
if (ipa_nonzero(BGP_CFG->remote_ip) || BGP_CFG->remote_range)
cf_error("Only one neighbor per BGP instance is allowed");
BGP_CFG->remote_ip = $3;
if ($4) BGP_CFG->iface = $4;
}
| bgp_proto NEIGHBOR RANGE net_ip bgp_nbr_opts ';' {
if (ipa_nonzero(BGP_CFG->remote_ip) || BGP_CFG->remote_range)
cf_error("Only one neighbor per BGP instance is allowed");
net_addr *n = cfg_alloc($4.length);
net_copy(n, &($4));
BGP_CFG->remote_range = n;
}
| bgp_proto INTERFACE TEXT ';' { BGP_CFG->iface = if_get_by_name($3); }
| bgp_proto RR CLUSTER ID idval ';' { BGP_CFG->rr_cluster_id = $5; }
| bgp_proto RR CLIENT bool ';' { BGP_CFG->rr_client = $4; }
@ -134,6 +145,12 @@ bgp_proto:
| bgp_proto DIRECT ';' { BGP_CFG->multihop = 0; }
| bgp_proto MULTIHOP ';' { BGP_CFG->multihop = 64; }
| bgp_proto MULTIHOP expr ';' { BGP_CFG->multihop = $3; if (($3<1) || ($3>255)) cf_error("Multihop must be in range 1-255"); }
| bgp_proto DYNAMIC NAME text ';' {
if (strchr($4, '%')) cf_error("Forbidden character '%%' in dynamic name");
if (strlen($4) > (SYM_MAX_LEN - 16)) cf_error("Dynamic name too long");
BGP_CFG->dynamic_name = $4;
}
| bgp_proto DYNAMIC NAME DIGITS expr ';' { BGP_CFG->dynamic_name_digits = $5; if ($5>10) cf_error("Dynamic name digits must be at most 10"); }
| bgp_proto STRICT BIND bool ';' { BGP_CFG->strict_bind = $4; }
| bgp_proto PATH METRIC bool ';' { BGP_CFG->compare_path_lengths = $4; }
| bgp_proto MED METRIC bool ';' { BGP_CFG->med_metric = $4; }
@ -223,6 +240,7 @@ bgp_channel_item:
| NEXT HOP ADDRESS ipa { BGP_CC->next_hop_addr = $4; }
| NEXT HOP SELF bgp_nh { BGP_CC->next_hop_self = $4; }
| NEXT HOP KEEP bgp_nh { BGP_CC->next_hop_keep = $4; }
| MANDATORY bool { BGP_CC->mandatory = $2; }
| MISSING LLADDR SELF { BGP_CC->missing_lladdr = MLL_SELF; }
| MISSING LLADDR DROP { BGP_CC->missing_lladdr = MLL_DROP; }
| MISSING LLADDR IGNORE { BGP_CC->missing_lladdr = MLL_IGNORE; }

View file

@ -100,7 +100,7 @@ init_mrt_bgp_data(struct bgp_conn *conn, struct mrt_bgp_data *d)
d->peer_as = p->remote_as;
d->local_as = p->local_as;
d->index = (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0;
d->af = ipa_is_ip4(p->cf->remote_ip) ? BGP_AFI_IPV4 : BGP_AFI_IPV6;
d->af = ipa_is_ip4(p->remote_ip) ? BGP_AFI_IPV4 : BGP_AFI_IPV6;
d->peer_ip = conn->sk ? conn->sk->daddr : IPA_NONE;
d->local_ip = conn->sk ? conn->sk->saddr : IPA_NONE;
d->as4 = p_ok ? p->as4_session : 0;
@ -208,19 +208,22 @@ bgp_af_caps_cmp(const void *X, const void *Y)
}
static byte *
bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
void
bgp_prepare_capabilities(struct bgp_conn *conn)
{
struct bgp_proto *p = conn->bgp;
struct bgp_channel *c;
struct bgp_caps *caps;
struct bgp_af_caps *ac;
uint any_ext_next_hop = 0;
uint any_add_path = 0;
byte *data;
if (!p->cf->capabilities)
{
/* Just prepare empty local_caps */
conn->local_caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps));
return;
}
/* Prepare bgp_caps structure */
int n = list_length(&p->p.channels);
caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps) + n * sizeof(struct bgp_af_caps));
conn->local_caps = caps;
@ -251,10 +254,10 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
ac->ready = 1;
ac->ext_next_hop = bgp_channel_is_ipv4(c) && c->cf->ext_next_hop;
any_ext_next_hop |= ac->ext_next_hop;
caps->any_ext_next_hop |= ac->ext_next_hop;
ac->add_path = c->cf->add_path;
any_add_path |= ac->add_path;
caps->any_add_path |= ac->add_path;
if (c->cf->gr_able)
{
@ -276,7 +279,16 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
/* Sort capability fields by AFI/SAFI */
qsort(caps->af_data, caps->af_count, sizeof(struct bgp_af_caps), bgp_af_caps_cmp);
}
static byte *
bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
{
struct bgp_proto *p = conn->bgp;
struct bgp_caps *caps = conn->local_caps;
struct bgp_af_caps *ac;
byte *buf_head = buf;
byte *data;
/* Create capability list in buffer */
@ -301,7 +313,7 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
*buf++ = 0; /* Capability data length */
}
if (any_ext_next_hop)
if (caps->any_ext_next_hop)
{
*buf++ = 5; /* Capability 5: Support for extended next hop */
*buf++ = 0; /* Capability data length, will be fixed later */
@ -353,7 +365,7 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
buf += 4;
}
if (any_add_path)
if (caps->any_add_path)
{
*buf++ = 69; /* Capability 69: Support for ADD-PATH */
*buf++ = 0; /* Capability data length, will be fixed later */
@ -394,6 +406,8 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
data[-1] = buf - data;
}
caps->length = buf - buf_head;
return buf;
}
@ -405,6 +419,8 @@ bgp_read_capabilities(struct bgp_conn *conn, struct bgp_caps *caps, byte *pos, i
int i, cl;
u32 af;
caps->length += len;
while (len > 0)
{
if (len < 2 || len < (2 + pos[1]))
@ -568,6 +584,42 @@ err:
return;
}
static int
bgp_check_capabilities(struct bgp_conn *conn)
{
struct bgp_proto *p = conn->bgp;
struct bgp_caps *local = conn->local_caps;
struct bgp_caps *remote = conn->remote_caps;
struct bgp_channel *c;
int count = 0;
/* This is partially overlapping with bgp_conn_enter_established_state(),
but we need to run this just after we receive OPEN message */
WALK_LIST(c, p->p.channels)
{
const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi);
const struct bgp_af_caps *rem = bgp_find_af_caps(remote, c->afi);
/* Find out whether this channel will be active */
int active = loc && loc->ready &&
((rem && rem->ready) || (!remote->length && (c->afi == BGP_AF_IPV4)));
/* Mandatory must be active */
if (c->cf->mandatory && !active)
return 0;
if (active)
count++;
}
/* We need at least one channel active */
if (!count)
return 0;
return 1;
}
static int
bgp_read_options(struct bgp_conn *conn, byte *pos, int len)
{
@ -635,9 +687,6 @@ bgp_create_open(struct bgp_conn *conn, byte *buf)
}
else
{
/* Prepare empty local_caps */
conn->local_caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps));
buf[9] = 0; /* No optional parameters */
return buf + 10;
}
@ -678,6 +727,10 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
if (!id || (p->is_internal && id == p->local_id))
{ bgp_error(conn, 2, 3, pkt+24, -4); return; }
/* RFC 5492 4 - check for required capabilities */
if (p->cf->capabilities && !bgp_check_capabilities(conn))
{ bgp_error(conn, 2, 7, NULL, 0); return; }
struct bgp_caps *caps = conn->remote_caps;
if (caps->as4_support)
@ -687,13 +740,18 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
if ((as4 != asn) && (asn != AS_TRANS))
log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name);
if (as4 != p->remote_as)
/* When remote ASN is unspecified, it must be external one */
if (p->remote_as ? (as4 != p->remote_as) : (as4 == p->local_as))
{ as4 = htonl(as4); bgp_error(conn, 2, 2, (byte *) &as4, 4); return; }
conn->received_as = as4;
}
else
{
if (asn != p->remote_as)
if (p->remote_as ? (asn != p->remote_as) : (asn == p->local_as))
{ bgp_error(conn, 2, 2, pkt+20, 2); return; }
conn->received_as = asn;
}
/* Check the other connection */
@ -962,7 +1020,7 @@ bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to)
WITHDRAW(NO_NEXT_HOP);
ip_addr *nh = (void *) a->u.ptr->data;
ip_addr peer = s->proto->cf->remote_ip;
ip_addr peer = s->proto->remote_ip;
uint len = a->u.ptr->length;
/* Forbid zero next hop */
@ -2280,7 +2338,7 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis
a->source = RTS_BGP;
a->scope = SCOPE_UNIVERSE;
a->from = s->proto->cf->remote_ip;
a->from = s->proto->remote_ip;
a->eattrs = ea;
c->desc->decode_next_hop(s, nh, nh_len, a);
@ -2634,6 +2692,12 @@ bgp_fire_tx(struct bgp_conn *conn)
end = bgp_create_notification(conn, pkt);
return bgp_send(conn, PKT_NOTIFICATION, end - buf);
}
else if (s & (1 << PKT_OPEN))
{
conn->packets_to_send &= ~(1 << PKT_OPEN);
end = bgp_create_open(conn, pkt);
return bgp_send(conn, PKT_OPEN, end - buf);
}
else if (s & (1 << PKT_KEEPALIVE))
{
conn->packets_to_send &= ~(1 << PKT_KEEPALIVE);
@ -2641,12 +2705,6 @@ bgp_fire_tx(struct bgp_conn *conn)
bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
return bgp_send(conn, PKT_KEEPALIVE, BGP_HEADER_LENGTH);
}
else if (s & (1 << PKT_OPEN))
{
conn->packets_to_send &= ~(1 << PKT_OPEN);
end = bgp_create_open(conn, pkt);
return bgp_send(conn, PKT_OPEN, end - buf);
}
else while (conn->channels_to_send)
{
c = bgp_get_channel_to_send(p, conn);
@ -2731,15 +2789,18 @@ bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type)
if ((conn->sk->tpos == conn->sk->tbuf) && !ev_active(conn->tx_ev))
ev_schedule(conn->tx_ev);
}
void
bgp_kick_tx(void *vconn)
{
struct bgp_conn *conn = vconn;
DBG("BGP: kicking TX\n");
while (bgp_fire_tx(conn) > 0)
uint max = 1024;
while (--max && (bgp_fire_tx(conn) > 0))
;
if (!max && !ev_active(conn->tx_ev))
ev_schedule(conn->tx_ev);
}
void
@ -2748,8 +2809,12 @@ bgp_tx(sock *sk)
struct bgp_conn *conn = sk->data;
DBG("BGP: TX hook\n");
while (bgp_fire_tx(conn) > 0)
uint max = 1024;
while (--max && (bgp_fire_tx(conn) > 0))
;
if (!max && !ev_active(conn->tx_ev))
ev_schedule(conn->tx_ev);
}
@ -2835,7 +2900,7 @@ bgp_handle_message(struct bgp_proto *p, byte *data, uint len, byte **bp)
return 1;
/* Handle proper message */
if ((msg_len > 128) && (msg_len + 1 > len))
if ((msg_len > 255) && (msg_len + 1 > len))
return 0;
/* Some elementary cleanup */

View file

@ -361,7 +361,7 @@ mrt_peer_table_dump(struct mrt_table_dump_state *s)
if ((P->proto == &proto_bgp) && (P->proto_state != PS_DOWN))
{
struct bgp_proto *p = (void *) P;
mrt_peer_table_entry(s, p->remote_id, p->remote_as, p->cf->remote_ip);
mrt_peer_table_entry(s, p->remote_id, p->remote_as, p->remote_ip);
}
#endif
@ -429,7 +429,7 @@ mrt_rib_table_entry(struct mrt_table_dump_state *s, rte *r)
{
struct bgp_proto *p = (void *) r->attrs->src->proto;
struct mrt_peer_entry *n =
HASH_FIND(s->peer_hash, PEER, p->remote_id, p->remote_as, p->cf->remote_ip);
HASH_FIND(s->peer_hash, PEER, p->remote_id, p->remote_as, p->remote_ip);
peer = n ? n->index : 0;
}

View file

@ -200,6 +200,7 @@ CF_KEYWORDS(RX, BUFFER, LARGE, NORMAL, STUBNET, HIDDEN, SUMMARY, TAG, EXTERNAL)
CF_KEYWORDS(WAIT, DELAY, LSADB, ECMP, LIMIT, WEIGHT, NSSA, TRANSLATOR, STABILITY)
CF_KEYWORDS(GLOBAL, LSID, ROUTER, SELF, INSTANCE, REAL, NETMASK, TX, PRIORITY, LENGTH)
CF_KEYWORDS(MERGE, LSA, SUPPRESSION, MULTICAST, RFC5838, VPN, PE)
CF_KEYWORDS(GRACEFUL, RESTART, AWARE, TIME)
%type <ld> lsadb_args
%type <i> ospf_variant ospf_af_mc nbma_eligible
@ -226,6 +227,8 @@ ospf_proto_start: proto_start ospf_variant
OSPF_CFG->tick = OSPF_DEFAULT_TICK;
OSPF_CFG->ospf2 = $2;
OSPF_CFG->af_ext = !$2;
OSPF_CFG->gr_mode = OSPF_GR_AWARE;
OSPF_CFG->gr_time = OSPF_DEFAULT_GR_TIME;
};
ospf_proto:
@ -258,6 +261,9 @@ ospf_proto_item:
| RFC5838 bool { OSPF_CFG->af_ext = $2; if (!ospf_cfg_is_v3()) cf_error("RFC5838 option requires OSPFv3"); }
| VPN PE bool { OSPF_CFG->vpn_pe = $3; }
| STUB ROUTER bool { OSPF_CFG->stub_router = $3; }
| GRACEFUL RESTART bool { OSPF_CFG->gr_mode = $3; }
| GRACEFUL RESTART AWARE { OSPF_CFG->gr_mode = OSPF_GR_AWARE; }
| GRACEFUL RESTART TIME expr { OSPF_CFG->gr_time = $4; if (($4 < 1) || ($4 > 1800)) cf_error("Graceful restart time must be in range 1-1800"); }
| ECMP bool { OSPF_CFG->ecmp = $2 ? OSPF_DEFAULT_ECMP_LIMIT : 0; }
| ECMP bool LIMIT expr { OSPF_CFG->ecmp = $2 ? $4 : 0; }
| MERGE EXTERNAL bool { OSPF_CFG->merge_external = $3; }

View file

@ -215,7 +215,7 @@ ospf_send_dbdes(struct ospf_proto *p, struct ospf_neighbor *n)
ASSERT((n->state == NEIGHBOR_EXSTART) || (n->state == NEIGHBOR_EXCHANGE));
if (n->ifa->oa->rt == NULL)
if (!n->ifa->oa->rt && !p->gr_recovery)
return;
ospf_prepare_dbdes(p, n);
@ -279,6 +279,10 @@ ospf_process_dbdes(struct ospf_proto *p, struct ospf_packet *pkt, struct ospf_ne
if (LSA_SCOPE(lsa_type) == LSA_SCOPE_RES)
DROP1("LSA with invalid scope");
/* RFC 3623 2.2 (2) special case - check for my router-LSA (GR recovery) */
if ((lsa_type == LSA_T_RT) && (lsa.rt == p->router_id))
n->got_my_rt_lsa = 1;
en = ospf_hash_find(p->gr, lsa_domain, lsa.id, lsa.rt, lsa_type);
if (!en || (lsa_comp(&lsa, &(en->lsa)) == CMP_NEWER))
{

View file

@ -772,6 +772,14 @@ ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new)
ifa->cf = new;
ifa->marked = 0;
/* Cancel GR peers if GR is disabled */
if (!p->gr_mode && p->gr_count)
{
struct ospf_neighbor *n, *nx;
WALK_LIST_DELSAFE(n, nx, ifa->neigh_list)
if (n->gr_active)
ospf_neigh_cancel_graceful_restart(n);
}
/* HELLO TIMER */
if (ifa->helloint != new->helloint)

View file

@ -12,6 +12,9 @@
#include "lib/fletcher16.h"
#define HDRLEN sizeof(struct ospf_lsa_header)
#ifndef CPU_BIG_ENDIAN
void
lsa_hton_hdr(struct ospf_lsa_header *h, struct ospf_lsa_header *n)
@ -61,7 +64,6 @@ lsa_ntoh_body(void *n, void *h, u16 len)
#endif /* little endian */
int
lsa_flooding_allowed(u32 type, u32 domain, struct ospf_iface *ifa)
{
@ -147,11 +149,13 @@ static const u16 lsa_v2_types[] = {
/* Maps OSPFv2 opaque types to OSPFv3 function codes */
static const u16 opaque_lsa_types[] = {
[LSA_OT_GR] = LSA_T_GR,
[LSA_OT_RI] = LSA_T_RI_,
};
/* Maps (subset of) OSPFv3 function codes to OSPFv2 opaque types */
static const u8 opaque_lsa_types_inv[] = {
[LSA_T_GR] = LSA_OT_GR,
[LSA_T_RI_] = LSA_OT_RI,
};
@ -168,7 +172,13 @@ lsa_get_type_domain_(u32 type, u32 id, struct ospf_iface *ifa, u32 *otype, u32 *
uint code;
if (LSA_FUNCTION(type) == LSA_T_OPAQUE_)
if (code = LOOKUP(opaque_lsa_types, id >> 24))
{
type = code | LSA_UBIT | LSA_SCOPE(type);
/* Hack for Grace-LSA: It does not use U-bit for link-scoped LSAs */
if (type == (LSA_T_GR | LSA_UBIT))
type = LSA_T_GR;
}
}
else
{
@ -196,6 +206,13 @@ lsa_get_type_domain_(u32 type, u32 id, struct ospf_iface *ifa, u32 *otype, u32 *
}
}
int
lsa_is_opaque(u32 type)
{
u32 fn = LSA_FUNCTION(type);
return LOOKUP(opaque_lsa_types_inv, fn) || (fn == LSA_T_OPAQUE_);
}
u32
lsa_get_opaque_type(u32 type)
{
@ -267,6 +284,51 @@ lsa_comp(struct ospf_lsa_header *l1, struct ospf_lsa_header *l2)
}
#define LSA_TLV_LENGTH(tlv) \
(sizeof(struct ospf_tlv) + BIRD_ALIGN((tlv)->length, 4))
#define LSA_NEXT_TLV(tlv) \
((struct ospf_tlv *) ((byte *) (tlv) + LSA_TLV_LENGTH(tlv)))
#define LSA_WALK_TLVS(tlv,buf,len) \
for(struct ospf_tlv *tlv = (void *) (buf); \
(byte *) tlv < (byte *) (buf) + (len); \
tlv = LSA_NEXT_TLV(tlv))
struct ospf_tlv *
lsa_get_tlv(struct top_hash_entry *en, uint type)
{
LSA_WALK_TLVS(tlv, en->lsa_body, en->lsa.length - HDRLEN)
if (tlv->type == type)
return tlv;
return NULL;
}
int
lsa_validate_tlvs(byte *buf, uint len)
{
byte *pos = buf;
byte *end = buf + len;
while (pos < end)
{
if ((pos + sizeof(struct ospf_tlv)) > end)
return 0;
struct ospf_tlv *tlv = (void *) pos;
uint len = LSA_TLV_LENGTH(tlv);
if ((pos + len) > end)
return 0;
pos += len;
}
return 1;
}
static inline int
lsa_walk_rt2(struct ospf_lsa_rt_walk *rt)
{
@ -408,7 +470,6 @@ lsa_parse_ext(struct top_hash_entry *en, int ospf2, int af, struct ospf_lsa_ext_
}
}
#define HDRLEN sizeof(struct ospf_lsa_header)
static int
lsa_validate_rt2(struct ospf_lsa_header *lsa, struct ospf_lsa_rt *body)
@ -603,6 +664,12 @@ lsa_validate_prefix(struct ospf_lsa_header *lsa, struct ospf_lsa_prefix *body)
return lsa_validate_pxlist(lsa, body->pxcount, sizeof(struct ospf_lsa_prefix), (u8 *) body);
}
static int
lsa_validate_gr(struct ospf_lsa_header *lsa, void *body)
{
return lsa_validate_tlvs(body, lsa->length - HDRLEN);
}
static int
lsa_validate_ri(struct ospf_lsa_header *lsa UNUSED, struct ospf_lsa_net *body UNUSED)
{
@ -643,6 +710,8 @@ lsa_validate(struct ospf_lsa_header *lsa, u32 lsa_type, int ospf2, void *body)
case LSA_T_EXT:
case LSA_T_NSSA:
return lsa_validate_ext2(lsa, body);
case LSA_T_GR:
return lsa_validate_gr(lsa, body);
case LSA_T_RI_LINK:
case LSA_T_RI_AREA:
case LSA_T_RI_AS:
@ -674,6 +743,8 @@ lsa_validate(struct ospf_lsa_header *lsa, u32 lsa_type, int ospf2, void *body)
return lsa_validate_link(lsa, body);
case LSA_T_PREFIX:
return lsa_validate_prefix(lsa, body);
case LSA_T_GR:
return lsa_validate_gr(lsa, body);
case LSA_T_RI_LINK:
case LSA_T_RI_AREA:
case LSA_T_RI_AS:

View file

@ -44,10 +44,7 @@ static inline void lsa_get_type_domain(struct ospf_lsa_header *lsa, struct ospf_
static inline u32 lsa_get_etype(struct ospf_lsa_header *h, struct ospf_proto *p)
{ return ospf_is_v2(p) ? (h->type_raw & LSA_T_V2_MASK) : h->type_raw; }
/* Assuming OSPFv2 - All U-bit LSAs are mapped to Opaque LSAs */
static inline int lsa_is_opaque(u32 type)
{ return !!(type & LSA_UBIT); }
int lsa_is_opaque(u32 type);
u32 lsa_get_opaque_type(u32 type);
int lsa_flooding_allowed(u32 type, u32 domain, struct ospf_iface *ifa);
int lsa_is_acceptable(u32 type, struct ospf_neighbor *n, struct ospf_proto *p);
@ -58,6 +55,16 @@ u16 lsa_verify_checksum(const void *lsa_n, int lsa_len);
#define CMP_SAME 0
#define CMP_OLDER -1
int lsa_comp(struct ospf_lsa_header *l1, struct ospf_lsa_header *l2);
struct ospf_tlv * lsa_get_tlv(struct top_hash_entry *en, uint type);
static inline u32
lsa_get_tlv_u32(struct top_hash_entry *en, uint type)
{
struct ospf_tlv *tlv = lsa_get_tlv(en, type);
return (tlv && (tlv->length == 4)) ? tlv->data[0] : 0;
}
void lsa_walk_rt_init(struct ospf_proto *po, struct top_hash_entry *act, struct ospf_lsa_rt_walk *rt);
int lsa_walk_rt(struct ospf_lsa_rt_walk *rt);
void lsa_parse_sum_net(struct top_hash_entry *en, int ospf2, int af, net_addr *net, u8 *pxopts, u32 *metric);

View file

@ -185,6 +185,13 @@ static int ospf_flood_lsupd(struct ospf_proto *p, struct top_hash_entry **lsa_li
static void
ospf_enqueue_lsa(struct ospf_proto *p, struct top_hash_entry *en, struct ospf_iface *ifa)
{
/* Exception for local Grace-LSA, they are flooded synchronously */
if ((en->lsa_type == LSA_T_GR) && (en->lsa.rt == p->router_id))
{
ospf_flood_lsupd(p, &en, 1, 1, ifa);
return;
}
if (ifa->flood_queue_used == ifa->flood_queue_size)
{
/* If we already have full queue, we send some packets */
@ -591,8 +598,9 @@ ospf_receive_lsupd(struct ospf_packet *pkt, struct ospf_iface *ifa,
}
/* 13. (5f) - handle self-originated LSAs, see also 13.4. */
if ((lsa.rt == p->router_id) ||
(ospf_is_v2(p) && (lsa_type == LSA_T_NET) && ospf_addr_is_local(p, ifa->oa, ipa_from_u32(lsa.id))))
if (!p->gr_recovery &&
((lsa.rt == p->router_id) ||
(ospf_is_v2(p) && (lsa_type == LSA_T_NET) && ospf_addr_is_local(p, ifa->oa, ipa_from_u32(lsa.id)))))
{
OSPF_TRACE(D_EVENTS, "Received unexpected self-originated LSA");
ospf_advance_lsa(p, en, &lsa, lsa_type, lsa_domain, body);
@ -629,6 +637,14 @@ ospf_receive_lsupd(struct ospf_packet *pkt, struct ospf_iface *ifa,
if (lsa_type == LSA_T_LINK)
ospf_notify_net_lsa(ifa);
/* RFC 3623 3.1 - entering graceful restart helper mode */
if (lsa_type == LSA_T_GR)
ospf_neigh_notify_grace_lsa(n, en);
/* Link received pre-restart router LSA */
if (p->gr_recovery && (lsa_type == LSA_T_RT) && (lsa.rt == p->router_id))
ifa->oa->rt = en;
/* 13. (5b) - flood new LSA */
int flood_back = ospf_flood_lsa(p, en, n);

View file

@ -28,6 +28,8 @@ static void dbdes_timer_hook(timer *t);
static void lsrq_timer_hook(timer *t);
static void lsrt_timer_hook(timer *t);
static void ackd_timer_hook(timer *t);
static void ospf_neigh_stop_graceful_restart_(struct ospf_neighbor *n);
static void graceful_restart_timeout(timer *t);
static void
@ -163,7 +165,7 @@ ospf_neigh_chstate(struct ospf_neighbor *n, u8 state)
if (old_state == NEIGHBOR_FULL)
ifa->fadj--;
if (ifa->fadj != old_fadj)
if ((ifa->fadj != old_fadj) && !n->gr_active)
{
/* RFC 2328 12.4 Event 4 - neighbor enters/leaves Full state */
ospf_notify_rt_lsa(ifa->oa);
@ -182,6 +184,7 @@ ospf_neigh_chstate(struct ospf_neighbor *n, u8 state)
n->dds++;
n->myimms = DBDES_IMMS;
n->got_my_rt_lsa = 0;
tm_start(n->dbdes_timer, 0);
tm_start(n->ackd_timer, ifa->rxmtint S / 2);
@ -191,9 +194,9 @@ ospf_neigh_chstate(struct ospf_neighbor *n, u8 state)
n->myimms &= ~DBDES_I;
/* Generate NeighborChange event if needed, see RFC 2328 9.2 */
if ((state == NEIGHBOR_2WAY) && (old_state < NEIGHBOR_2WAY))
if ((state == NEIGHBOR_2WAY) && (old_state < NEIGHBOR_2WAY) && !n->gr_active)
ospf_iface_sm(ifa, ISM_NEICH);
if ((state < NEIGHBOR_2WAY) && (old_state >= NEIGHBOR_2WAY))
if ((state < NEIGHBOR_2WAY) && (old_state >= NEIGHBOR_2WAY) && !n->gr_active)
ospf_iface_sm(ifa, ISM_NEICH);
}
@ -291,6 +294,17 @@ ospf_neigh_sm(struct ospf_neighbor *n, int event)
case INM_KILLNBR:
case INM_LLDOWN:
case INM_INACTTIM:
if (n->gr_active && (event == INM_INACTTIM))
{
/* Just down the neighbor, but do not remove it */
reset_lists(p, n);
ospf_neigh_chstate(n, NEIGHBOR_DOWN);
break;
}
if (n->gr_active)
ospf_neigh_stop_graceful_restart_(n);
/* No need for reset_lists() */
ospf_neigh_chstate(n, NEIGHBOR_DOWN);
ospf_neigh_down(n);
@ -356,6 +370,180 @@ can_do_adj(struct ospf_neighbor *n)
return i;
}
static void
ospf_neigh_start_graceful_restart(struct ospf_neighbor *n, uint gr_time)
{
struct ospf_proto *p = n->ifa->oa->po;
OSPF_TRACE(D_EVENTS, "Neighbor %R on %s started graceful restart",
n->rid, n->ifa->ifname);
n->gr_active = 1;
p->gr_count++;
n->gr_timer = tm_new_init(n->pool, graceful_restart_timeout, n, 0, 0);
tm_start(n->gr_timer, gr_time S);
}
static void
ospf_neigh_stop_graceful_restart_(struct ospf_neighbor *n)
{
struct ospf_proto *p = n->ifa->oa->po;
struct ospf_iface *ifa = n->ifa;
n->gr_active = 0;
p->gr_count--;
rfree(n->gr_timer);
n->gr_timer = NULL;
ospf_notify_rt_lsa(ifa->oa);
ospf_notify_net_lsa(ifa);
if (ifa->type == OSPF_IT_VLINK)
ospf_notify_rt_lsa(ifa->voa);
ospf_iface_sm(ifa, ISM_NEICH);
}
static void
ospf_neigh_stop_graceful_restart(struct ospf_neighbor *n)
{
struct ospf_proto *p = n->ifa->oa->po;
OSPF_TRACE(D_EVENTS, "Neighbor %R on %s finished graceful restart",
n->rid, n->ifa->ifname);
ospf_neigh_stop_graceful_restart_(n);
}
void
ospf_neigh_cancel_graceful_restart(struct ospf_neighbor *n)
{
struct ospf_proto *p = n->ifa->oa->po;
OSPF_TRACE(D_EVENTS, "Graceful restart canceled for nbr %R on %s",
n->rid, n->ifa->ifname);
ospf_neigh_stop_graceful_restart_(n);
if (n->state == NEIGHBOR_DOWN)
ospf_neigh_down(n);
}
static void
graceful_restart_timeout(timer *t)
{
struct ospf_neighbor *n = t->data;
struct ospf_proto *p = n->ifa->oa->po;
OSPF_TRACE(D_EVENTS, "Graceful restart timer expired for nbr %R on %s",
n->rid, n->ifa->ifname);
ospf_neigh_stop_graceful_restart_(n);
if (n->state == NEIGHBOR_DOWN)
ospf_neigh_down(n);
}
static inline int
changes_in_lsrtl(struct ospf_neighbor *n)
{
/* This could be improved, see RFC 3623 3.1 (2) */
struct top_hash_entry *en;
WALK_SLIST(en, n->lsrtl)
if (LSA_FUNCTION(en->lsa_type) <= LSA_FUNCTION(LSA_T_NSSA))
return 1;
return 0;
}
void
ospf_neigh_notify_grace_lsa(struct ospf_neighbor *n, struct top_hash_entry *en)
{
struct ospf_iface *ifa = n->ifa;
struct ospf_proto *p = ifa->oa->po;
/* In OSPFv2, neighbors are identified by either IP or Router ID, based on network type */
uint t = ifa->type;
if (ospf_is_v2(p) && ((t == OSPF_IT_BCAST) || (t == OSPF_IT_NBMA) || (t == OSPF_IT_PTMP)))
{
struct ospf_tlv *tlv = lsa_get_tlv(en, LSA_GR_ADDRESS);
if (!tlv || tlv->length != 4)
return;
ip_addr addr = ipa_from_u32(tlv->data[0]);
if (!ipa_equal(n->ip, addr))
n = find_neigh_by_ip(ifa, addr);
}
else
{
if (n->rid != en->lsa.rt)
n = find_neigh(ifa, en->lsa.rt);
}
if (!n)
return;
if (en->lsa.age < LSA_MAXAGE)
{
u32 period = lsa_get_tlv_u32(en, LSA_GR_PERIOD);
/* Exception for updating grace period */
if (n->gr_active)
{
tm_start(n->gr_timer, (period S) - (en->lsa.age S));
return;
}
/* RFC 3623 3.1 (1) - full adjacency */
if (n->state != NEIGHBOR_FULL)
return;
/* RFC 3623 3.1 (2) - no changes in LSADB */
if (changes_in_lsrtl(n))
return;
/* RFC 3623 3.1 (3) - grace period not expired */
if (en->lsa.age >= period)
return;
/* RFC 3623 3.1 (4) - helper mode allowed */
if (!p->gr_mode)
return;
/* RFC 3623 3.1 (5) - no local graceful restart */
if (p->p.gr_recovery)
return;
ospf_neigh_start_graceful_restart(n, period - en->lsa.age);
}
else /* Grace-LSA is flushed */
{
if (n->gr_active)
ospf_neigh_stop_graceful_restart(n);
}
}
void
ospf_neigh_lsadb_changed_(struct ospf_proto *p, struct top_hash_entry *en)
{
struct ospf_iface *ifa;
struct ospf_neighbor *n, *nx;
if (LSA_FUNCTION(en->lsa_type) > LSA_FUNCTION(LSA_T_NSSA))
return;
/* RFC 3623 3.2 (3) - cancel graceful restart when LSdb changed */
WALK_LIST(ifa, p->iface_list)
if (lsa_flooding_allowed(en->lsa_type, en->domain, ifa))
WALK_LIST_DELSAFE(n, nx, ifa->neigh_list)
if (n->gr_active)
ospf_neigh_cancel_graceful_restart(n);
}
static inline u32 neigh_get_id(struct ospf_proto *p, struct ospf_neighbor *n)
{ return ospf_is_v2(p) ? ipa_to_u32(n->ip) : n->rid; }

View file

@ -92,7 +92,9 @@
* - RFC 2328 - main OSPFv2 standard
* - RFC 5340 - main OSPFv3 standard
* - RFC 3101 - OSPFv2 NSSA areas
* - RFC 3623 - OSPFv2 Graceful Restart
* - RFC 4576 - OSPFv2 VPN loop prevention
* - RFC 5187 - OSPFv3 Graceful Restart
* - RFC 5250 - OSPFv2 Opaque LSAs
* - RFC 5709 - OSPFv2 HMAC-SHA Cryptographic Authentication
* - RFC 5838 - OSPFv3 Support of Address Families
@ -207,7 +209,6 @@ ospf_area_remove(struct ospf_area *oa)
mb_free(oa);
}
struct ospf_area *
ospf_find_area(struct ospf_proto *p, u32 aid)
{
@ -228,6 +229,37 @@ ospf_find_vlink(struct ospf_proto *p, u32 voa, u32 vid)
return NULL;
}
static void
ospf_start_gr_recovery(struct ospf_proto *p)
{
OSPF_TRACE(D_EVENTS, "Graceful restart started");
p->gr_recovery = 1;
p->gr_timeout = current_time() + (p->gr_time S);
channel_graceful_restart_lock(p->p.main_channel);
p->p.main_channel->gr_wait = 1;
/* NOTE: We should get end of grace period from non-volatile storage */
}
void
ospf_stop_gr_recovery(struct ospf_proto *p)
{
p->gr_recovery = 0;
p->gr_timeout = 0;
channel_graceful_restart_unlock(p->p.main_channel);
/* Reorigination of router/network LSAs is already scheduled */
ospf_mark_lsadb(p);
/*
* NOTE: We should move channel_graceful_restart_unlock() to the end of
* ospf_disp() in order to have local LSA reorigination / LSAdb cleanup /
* routing table recomputation before official end of GR. It does not matter
* when we are single-threaded.
*/
}
static int
ospf_start(struct proto *P)
{
@ -246,6 +278,8 @@ ospf_start(struct proto *P)
p->asbr = c->asbr;
p->vpn_pe = c->vpn_pe;
p->ecmp = c->ecmp;
p->gr_mode = c->gr_mode;
p->gr_time = c->gr_time;
p->tick = c->tick;
p->disp_timer = tm_new_init(P->pool, ospf_disp, p, p->tick S, 0);
tm_start(p->disp_timer, 100 MS);
@ -267,6 +301,10 @@ ospf_start(struct proto *P)
p->log_pkt_tbf = (struct tbf){ .rate = 1, .burst = 5 };
p->log_lsa_tbf = (struct tbf){ .rate = 4, .burst = 20 };
/* Lock the channel when in GR recovery mode */
if (p->p.gr_recovery && (p->gr_mode == OSPF_GR_ABLE))
ospf_start_gr_recovery(p);
WALK_LIST(ac, c->area_list)
ospf_area_add(p, ac);
@ -398,6 +436,9 @@ ospf_disp(timer * timer)
{
struct ospf_proto *p = timer->data;
if (p->gr_recovery)
ospf_update_gr_recovery(p);
/* Originate or flush local topology LSAs */
ospf_update_topology(p);
@ -475,9 +516,18 @@ ospf_shutdown(struct proto *P)
OSPF_TRACE(D_EVENTS, "Shutdown requested");
/* And send to all my neighbors 1WAY */
WALK_LIST(ifa, p->iface_list)
ospf_iface_shutdown(ifa);
if ((P->down_code == PDC_CMD_GR_DOWN) && (p->gr_mode == OSPF_GR_ABLE))
{
/* Originate Grace LSAs */
WALK_LIST(ifa, p->iface_list)
ospf_originate_gr_lsa(p, ifa);
}
else
{
/* Send to all my neighbors 1WAY */
WALK_LIST(ifa, p->iface_list)
ospf_iface_shutdown(ifa);
}
/* Cleanup locked rta entries */
FIB_WALK(&p->rtf, ort, nf)
@ -664,6 +714,8 @@ ospf_reconfigure(struct proto *P, struct proto_config *CF)
p->merge_external = new->merge_external;
p->asbr = new->asbr;
p->ecmp = new->ecmp;
p->gr_mode = new->gr_mode;
p->gr_time = new->gr_time;
p->tick = new->tick;
p->disp_timer->recurrent = p->tick S;
tm_start(p->disp_timer, 10 MS);

View file

@ -75,6 +75,7 @@
#define OSPF_DEFAULT_TICK 1
#define OSPF_DEFAULT_STUB_COST 1000
#define OSPF_DEFAULT_ECMP_LIMIT 16
#define OSPF_DEFAULT_GR_TIME 120
#define OSPF_DEFAULT_TRANSINT 40
#define OSPF_MIN_PKT_SIZE 256
@ -82,6 +83,9 @@
#define OSPF_VLINK_ID_OFFSET 0x80000000
#define OSPF_GR_ABLE 1
#define OSPF_GR_AWARE 2
struct ospf_config
{
struct proto_config c;
@ -97,7 +101,9 @@ struct ospf_config
u8 abr;
u8 asbr;
u8 vpn_pe;
int ecmp;
u8 gr_mode; /* Graceful restart mode (OSPF_GR_*) */
uint gr_time; /* Graceful restart interval */
uint ecmp;
list area_list; /* list of area configs (struct ospf_area_config) */
list vlink_list; /* list of configured vlinks (struct ospf_iface_patt) */
};
@ -216,6 +222,9 @@ struct ospf_proto
list area_list; /* List of OSPF areas (struct ospf_area) */
int areano; /* Number of area I belong to */
int padj; /* Number of neighbors in Exchange or Loading state */
int gr_count; /* Number of neighbors in graceful restart state */
int gr_recovery; /* Graceful restart recovery is active */
btime gr_timeout; /* The end time of grace restart recovery */
struct fib rtf; /* Routing table */
struct idm idm; /* OSPFv3 LSA ID map */
u8 ospf2; /* OSPF v2 or v3 */
@ -228,6 +237,8 @@ struct ospf_proto
u8 asbr; /* May i originate any ext/NSSA lsa? */
u8 vpn_pe; /* Should we do VPN PE specific behavior (RFC 4577)? */
u8 ecmp; /* Maximal number of nexthops in ECMP route, or 0 */
u8 gr_mode; /* Graceful restart mode (OSPF_GR_*) */
uint gr_time; /* Graceful restart interval */
u64 csn64; /* Last used cryptographic sequence number */
struct ospf_area *backbone; /* If exists */
event *flood_event; /* Event for flooding LS updates */
@ -346,6 +357,8 @@ struct ospf_neighbor
pool *pool;
struct ospf_iface *ifa;
u8 state;
u8 gr_active; /* We act as GR helper for the neighbor */
u8 got_my_rt_lsa; /* Received my Rt-LSA in DBDES exchanged */
timer *inactim; /* Inactivity timer */
u8 imms; /* I, M, Master/slave received */
u8 myimms; /* I, M Master/slave */
@ -388,6 +401,7 @@ struct ospf_neighbor
#define ACKL_DIRECT 0
#define ACKL_DELAY 1
timer *ackd_timer; /* Delayed ack timer */
timer *gr_timer; /* Graceful restart timer, non-NULL only if gr_active */
struct bfd_request *bfd_req; /* BFD request, if BFD is used */
void *ldd_buffer; /* Last database description packet */
u32 ldd_bsize; /* Buffer size for ldd_buffer */
@ -555,6 +569,7 @@ struct ospf_auth3
#define LSA_T_NSSA 0x2007
#define LSA_T_LINK 0x0008
#define LSA_T_PREFIX 0x2009
#define LSA_T_GR 0x000B
#define LSA_T_RI_ 0x000C
#define LSA_T_RI_LINK 0x800C
#define LSA_T_RI_AREA 0xA00C
@ -569,6 +584,7 @@ struct ospf_auth3
/* OSPFv2 Opaque LSA Types */
/* https://www.iana.org/assignments/ospf-opaque-types/ospf-opaque-types.xhtml#ospf-opaque-types-2 */
#define LSA_OT_GR 0x03
#define LSA_OT_RI 0x04
#define LSA_FUNCTION_MASK 0x1FFF
@ -613,6 +629,12 @@ struct ospf_auth3
#define LSA_EXT3_FBIT 0x02000000
#define LSA_EXT3_TBIT 0x01000000
/* OSPF Grace LSA (GR) TLVs */
/* https://www.iana.org/assignments/ospfv2-parameters/ospfv2-parameters.xhtml#ospfv2-parameters-13 */
#define LSA_GR_PERIOD 1
#define LSA_GR_REASON 2
#define LSA_GR_ADDRESS 3
/* OSPF Router Information (RI) TLVs */
/* https://www.iana.org/assignments/ospf-parameters/ospf-parameters.xhtml#ri-tlv */
#define LSA_RI_RIC 1
@ -959,6 +981,8 @@ static inline int oa_is_ext(struct ospf_area *oa)
static inline int oa_is_nssa(struct ospf_area *oa)
{ return oa->options & OPT_N; }
void ospf_stop_gr_recovery(struct ospf_proto *p);
void ospf_sh_neigh(struct proto *P, char *iff);
void ospf_sh(struct proto *P);
void ospf_sh_iface(struct proto *P, char *iff);
@ -990,12 +1014,18 @@ static inline struct nbma_node * find_nbma_node(struct ospf_iface *ifa, ip_addr
/* neighbor.c */
struct ospf_neighbor *ospf_neighbor_new(struct ospf_iface *ifa);
void ospf_neigh_sm(struct ospf_neighbor *n, int event);
void ospf_neigh_cancel_graceful_restart(struct ospf_neighbor *n);
void ospf_neigh_notify_grace_lsa(struct ospf_neighbor *n, struct top_hash_entry *en);
void ospf_neigh_lsadb_changed_(struct ospf_proto *p, struct top_hash_entry *en);
void ospf_dr_election(struct ospf_iface *ifa);
struct ospf_neighbor *find_neigh(struct ospf_iface *ifa, u32 rid);
struct ospf_neighbor *find_neigh_by_ip(struct ospf_iface *ifa, ip_addr ip);
void ospf_neigh_update_bfd(struct ospf_neighbor *n, int use_bfd);
void ospf_sh_neigh_info(struct ospf_neighbor *n);
static inline void ospf_neigh_lsadb_changed(struct ospf_proto *p, struct top_hash_entry *en)
{ if (p->gr_count) ospf_neigh_lsadb_changed_(p, en); }
/* packet.c */
void ospf_pkt_fill_hdr(struct ospf_iface *ifa, void *buf, u8 h_type);
int ospf_rx_hook(sock * sk, uint size);

View file

@ -10,7 +10,7 @@
#include "ospf.h"
static void add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry *par, u32 dist, int i, uint lif, uint nif);
static void add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry *par, u32 dist, int i, uint data, uint lif, uint nif);
static void rt_sync(struct ospf_proto *p);
@ -392,6 +392,40 @@ px_pos_to_ifa(struct ospf_area *oa, int pos)
return NULL;
}
static inline struct ospf_iface *
rt_find_iface2(struct ospf_area *oa, uint data)
{
ip_addr addr = ipa_from_u32(data);
/* We should handle it differently for unnumbered PTP links */
struct ospf_iface *ifa;
WALK_LIST(ifa, oa->po->iface_list)
if ((ifa->oa == oa) && ifa->addr && (ipa_equal(ifa->addr->ip, addr)))
return ifa;
return NULL;
}
static inline struct ospf_iface *
rt_find_iface3(struct ospf_area *oa, uint lif)
{
struct ospf_iface *ifa;
WALK_LIST(ifa, oa->po->iface_list)
if ((ifa->oa == oa) && (ifa->iface_id == lif))
return ifa;
return NULL;
}
static struct ospf_iface *
rt_find_iface(struct ospf_area *oa, int pos, uint data, uint lif)
{
if (0)
return rt_pos_to_ifa(oa, pos);
else
return ospf_is_v2(oa->po) ? rt_find_iface2(oa, data) : rt_find_iface3(oa, lif);
}
static void
add_network(struct ospf_area *oa, net_addr *net, int metric, struct top_hash_entry *en, int pos)
@ -503,7 +537,7 @@ spfa_process_rt(struct ospf_proto *p, struct ospf_area *oa, struct top_hash_entr
break;
}
add_cand(oa, tmp, act, act->dist + rtl.metric, i, rtl.lif, rtl.nif);
add_cand(oa, tmp, act, act->dist + rtl.metric, i, rtl.data, rtl.lif, rtl.nif);
}
}
@ -526,7 +560,7 @@ spfa_process_net(struct ospf_proto *p, struct ospf_area *oa, struct top_hash_ent
for (i = 0; i < cnt; i++)
{
tmp = ospf_hash_find_rt(p->gr, oa->areaid, ln->routers[i]);
add_cand(oa, tmp, act, act->dist, -1, 0, 0);
add_cand(oa, tmp, act, act->dist, -1, 0, 0, 0);
}
}
@ -1708,7 +1742,7 @@ link_lsa_lladdr(struct ospf_proto *p, struct top_hash_entry *en)
static struct nexthop *
calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en,
struct top_hash_entry *par, int pos, uint lif, uint nif)
struct top_hash_entry *par, int pos, uint data, uint lif, uint nif)
{
struct ospf_proto *p = oa->po;
struct nexthop *pn = par->nhs;
@ -1735,7 +1769,7 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en,
/* The first case - local network */
if ((en->lsa_type == LSA_T_NET) && (par == oa->rt))
{
ifa = rt_pos_to_ifa(oa, pos);
ifa = rt_find_iface(oa, pos, data, lif);
if (!ifa)
return NULL;
@ -1748,7 +1782,7 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en,
/* The second case - ptp or ptmp neighbor */
if ((en->lsa_type == LSA_T_RT) && (par == oa->rt))
{
ifa = rt_pos_to_ifa(oa, pos);
ifa = rt_find_iface(oa, pos, data, lif);
if (!ifa)
return NULL;
@ -1838,7 +1872,7 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en,
/* Add LSA into list of candidates in Dijkstra's algorithm */
static void
add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry *par,
u32 dist, int pos, uint lif, uint nif)
u32 dist, int pos, uint data, uint lif, uint nif)
{
struct ospf_proto *p = oa->po;
node *prev, *n;
@ -1871,7 +1905,7 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry
if (!link_back(oa, en, par, lif, nif))
return;
struct nexthop *nhs = calc_next_hop(oa, en, par, pos, lif, nif);
struct nexthop *nhs = calc_next_hop(oa, en, par, pos, data, lif, nif);
if (!nhs)
{
log(L_WARN "%s: Cannot find next hop for LSA (Type: %04x, Id: %R, Rt: %R)",
@ -2086,3 +2120,133 @@ again2:
if (en->mode == LSA_M_STALE)
ospf_flush_lsa(p, en);
}
/* RFC 3623 2.2 - checking for graceful restart termination conditions */
void
ospf_update_gr_recovery(struct ospf_proto *p)
{
struct top_hash_entry *rt, *net, *nbr;
struct ospf_lsa_rt_walk rtl;
struct ospf_neighbor *n;
struct ospf_iface *ifa;
struct ospf_area *oa;
const char *err_dsc = NULL;
uint i, j, missing = 0, err_val = 0;
/*
* We check here for three cases:
* RFC 3623 2.2 (1) - success when all adjacencies are established
* RFC 3623 2.2 (2) - failure when inconsistent LSA was received
* RFC 3623 2.2 (3) - grace period timeout
*
* It is handled by processing pre-restart local router-LSA and adjacent
* network-LSAs, checking neighbor association for referenced routers (1)
* and checking back links from their router-LSAs (2).
*
* TODO: Use timer for grace period timeout. We avoided that as function
* ospf_stop_gr_recovery() called from ospf_disp() makes ending of graceful
* restart uninterrupted by other events.
*/
#define CONTINUE { missing++; continue; }
if (current_time() > p->gr_timeout)
goto timeout;
WALK_LIST(oa, p->area_list)
{
/* Get the router-LSA */
rt = oa->rt;
if (!rt || (rt->lsa.age == LSA_MAXAGE))
CONTINUE;
for (lsa_walk_rt_init(p, rt, &rtl), i = 0; lsa_walk_rt(&rtl); i++)
{
if (rtl.type == LSART_STUB)
continue;
ifa = rt_find_iface(oa, i, rtl.data, rtl.lif);
if (!ifa)
DROP("inconsistent interface", ospf_is_v2(p) ? rtl.data : rtl.lif);
switch (rtl.type)
{
case LSART_NET:
/* Find the network-LSA */
net = ospf_hash_find_net(p->gr, oa->areaid, rtl.id, rtl.nif);
if (!net)
CONTINUE;
if (!link_back(oa, net, rt, rtl.lif, rtl.nif))
DROP("Inconsistent network-LSA", net->lsa.id);
if (ifa->state == OSPF_IS_DR)
{
/* Find all neighbors from the network-LSA */
struct ospf_lsa_net *net_body = net->lsa_body;
uint cnt = lsa_net_count(&net->lsa);
for (j = 0; j < cnt; i++)
{
n = find_neigh(ifa, net_body->routers[j]);
if (!n || (n->state != NEIGHBOR_FULL))
CONTINUE;
if (!n->got_my_rt_lsa)
DROP("not received my router-LSA", n->rid);
nbr = ospf_hash_find_rt(p->gr, oa->areaid, n->rid);
if (!link_back(oa, nbr, net, 0, 0))
DROP("inconsistent router-LSA", n->rid);
}
}
else
{
/* Find the DR (by IP for OSPFv2) */
n = ospf_is_v2(p) ?
find_neigh_by_ip(ifa, ipa_from_u32(rtl.id)) :
find_neigh(ifa, rtl.id);
if (!n || (n->state != NEIGHBOR_FULL))
CONTINUE;
if (!n->got_my_rt_lsa)
DROP("not received my router-LSA", n->rid);
}
break;
case LSART_VLNK:
case LSART_PTP:
/* Find the PtP peer */
n = find_neigh(ifa, rtl.id);
if (!n || (n->state != NEIGHBOR_FULL))
CONTINUE;
if (!n->got_my_rt_lsa)
DROP("not received my router-LSA", n->rid);
nbr = ospf_hash_find_rt(p->gr, oa->areaid, rtl.id);
if (!link_back(oa, nbr, rt, rtl.lif, rtl.nif))
DROP("inconsistent router-LSA", rtl.id);
}
}
}
#undef CONTINUE
if (missing)
return;
OSPF_TRACE(D_EVENTS, "Graceful restart finished");
ospf_stop_gr_recovery(p);
return;
drop:
log(L_INFO "%s: Graceful restart ended - %s (%R)", p->p.name, err_dsc, err_val);
ospf_stop_gr_recovery(p);
return;
timeout:
log(L_INFO "%s: Graceful restart ended - grace period expired", p->p.name);
ospf_stop_gr_recovery(p);
return;
}

View file

@ -130,6 +130,7 @@ static inline int rt_is_nssa(ort *nf)
void ospf_rt_spf(struct ospf_proto *p);
void ospf_rt_initort(struct fib_node *fn);
void ospf_update_gr_recovery(struct ospf_proto *p);
#endif /* _BIRD_OSPF_RT_H_ */

View file

@ -83,7 +83,10 @@ ospf_install_lsa(struct ospf_proto *p, struct ospf_lsa_header *lsa, u32 type, u3
en->lsa_type, en->lsa.id, en->lsa.rt, en->lsa.sn, en->lsa.age);
if (change)
{
ospf_neigh_lsadb_changed(p, en);
ospf_schedule_rtcalc(p);
}
return en;
}
@ -243,6 +246,7 @@ ospf_do_originate_lsa(struct ospf_proto *p, struct top_hash_entry *en, void *lsa
en->lsa.age = 0;
en->init_age = 0;
en->inst_time = current_time();
en->dirty = 0;
lsa_generate_checksum(&en->lsa, en->lsa_body);
OSPF_TRACE(D_EVENTS, "Originating LSA: Type: %04x, Id: %R, Rt: %R, Seq: %08x",
@ -251,7 +255,10 @@ ospf_do_originate_lsa(struct ospf_proto *p, struct top_hash_entry *en, void *lsa
ospf_flood_lsa(p, en, NULL);
if (en->mode == LSA_M_BASIC)
{
ospf_neigh_lsadb_changed(p, en);
ospf_schedule_rtcalc(p);
}
return 1;
}
@ -321,7 +328,8 @@ ospf_originate_lsa(struct ospf_proto *p, struct ospf_new_lsa *lsa)
if ((en->lsa.age < LSA_MAXAGE) &&
(lsa_length == en->lsa.length) &&
!memcmp(lsa_body, en->lsa_body, lsa_blen) &&
(!ospf_is_v2(p) || (lsa->opts == lsa_get_options(&en->lsa))))
(!ospf_is_v2(p) || (lsa->opts == lsa_get_options(&en->lsa))) &&
!en->dirty)
goto drop;
lsa_body = lsab_flush(p);
@ -433,7 +441,10 @@ ospf_flush_lsa(struct ospf_proto *p, struct top_hash_entry *en)
ospf_flood_lsa(p, en, NULL);
if (en->mode == LSA_M_BASIC)
{
ospf_neigh_lsadb_changed(p, en);
ospf_schedule_rtcalc(p);
}
en->mode = LSA_M_BASIC;
}
@ -509,6 +520,12 @@ ospf_update_lsadb(struct ospf_proto *p)
continue;
}
if (en->dirty)
{
ospf_flush_lsa(p, en);
continue;
}
if ((en->lsa.rt == p->router_id) && (real_age >= LSREFRESHTIME))
{
ospf_refresh_lsa(p, en);
@ -525,6 +542,16 @@ ospf_update_lsadb(struct ospf_proto *p)
}
}
void
ospf_mark_lsadb(struct ospf_proto *p)
{
struct top_hash_entry *en;
/* Mark all local LSAs as dirty */
WALK_SLIST(en, p->lsal)
if (en->lsa.rt == p->router_id)
en->dirty = 1;
}
static u32
ort_to_lsaid(struct ospf_proto *p, ort *nf)
@ -1424,6 +1451,7 @@ prepare_prefix_rt_lsa_body(struct ospf_proto *p, struct ospf_area *oa)
struct ospf_config *cf = (struct ospf_config *) (p->p.cf);
struct ospf_iface *ifa;
struct ospf_lsa_prefix *lp;
uint max = ospf_is_ip4(p) ? IP4_MAX_PREFIX_LENGTH : IP6_MAX_PREFIX_LENGTH;
int host_addr = 0;
int net_lsa;
int i = 0;
@ -1457,7 +1485,7 @@ prepare_prefix_rt_lsa_body(struct ospf_proto *p, struct ospf_area *oa)
(a->scope <= SCOPE_LINK))
continue;
if (((a->prefix.pxlen < IP6_MAX_PREFIX_LENGTH) && net_lsa) ||
if (((a->prefix.pxlen < max) && net_lsa) ||
configured_stubnet(oa, a))
continue;
@ -1465,8 +1493,13 @@ prepare_prefix_rt_lsa_body(struct ospf_proto *p, struct ospf_area *oa)
(ifa->state == OSPF_IS_LOOP) ||
(ifa->type == OSPF_IT_PTMP))
{
net_addr_ip6 net = NET_ADDR_IP6(a->ip, IP6_MAX_PREFIX_LENGTH);
lsab_put_prefix(p, (net_addr *) &net, 0);
net_addr net;
if (a->prefix.type == NET_IP4)
net_fill_ip4(&net, ipa_to_ip4(a->ip), IP4_MAX_PREFIX_LENGTH);
else
net_fill_ip6(&net, ipa_to_ip6(a->ip), IP6_MAX_PREFIX_LENGTH);
lsab_put_prefix(p, &net, 0);
host_addr = 1;
}
else
@ -1482,7 +1515,7 @@ prepare_prefix_rt_lsa_body(struct ospf_proto *p, struct ospf_area *oa)
if (!sn->hidden)
{
lsab_put_prefix(p, &sn->prefix, sn->cost);
if (sn->prefix.pxlen == IP6_MAX_PREFIX_LENGTH)
if (sn->prefix.pxlen == max)
host_addr = 1;
i++;
}
@ -1669,6 +1702,59 @@ ospf_originate_prefix_net_lsa(struct ospf_proto *p, struct ospf_iface *ifa)
}
/*
* Grace LSA handling
* Type = LSA_T_GR, opaque type = LSA_OT_GR
*/
static inline void
ospf_add_gr_period_tlv(struct ospf_proto *p, uint period)
{
struct ospf_tlv *tlv = lsab_allocz(p, sizeof(struct ospf_tlv) + sizeof(u32));
tlv->type = LSA_GR_PERIOD;
tlv->length = 4;
tlv->data[0] = period;
}
static inline void
ospf_add_gr_reason_tlv(struct ospf_proto *p, uint reason)
{
struct ospf_tlv *tlv = lsab_allocz(p, sizeof(struct ospf_tlv) + sizeof(u32));
tlv->type = LSA_GR_REASON;
tlv->length = 1;
tlv->data[0] = reason << 24;
}
static inline void
ospf_add_gr_address_tlv(struct ospf_proto *p, ip4_addr addr)
{
struct ospf_tlv *tlv = lsab_allocz(p, sizeof(struct ospf_tlv) + sizeof(u32));
tlv->type = LSA_GR_ADDRESS;
tlv->length = 4;
tlv->data[0] = ip4_to_u32(addr);
}
void
ospf_originate_gr_lsa(struct ospf_proto *p, struct ospf_iface *ifa)
{
struct ospf_new_lsa lsa = {
.type = LSA_T_GR,
.dom = ifa->iface_id,
.id = ospf_is_v2(p) ? 0 : ifa->iface_id,
.ifa = ifa
};
ospf_add_gr_period_tlv(p, p->gr_time);
ospf_add_gr_reason_tlv(p, 0);
uint t = ifa->type;
if (ospf_is_v2(p) && ((t == OSPF_IT_BCAST) || (t == OSPF_IT_NBMA) || (t == OSPF_IT_PTMP)))
ospf_add_gr_address_tlv(p, ipa_to_ip4(ifa->addr->ip));
ospf_originate_lsa(p, &lsa);
}
/*
* Router Information LSA handling
* Type = LSA_T_RI_AREA, opaque type = LSA_OT_RI
@ -1712,6 +1798,10 @@ ospf_update_topology(struct ospf_proto *p)
struct ospf_area *oa;
struct ospf_iface *ifa;
/* No LSA reorigination during GR recovery */
if (p->gr_recovery)
return;
WALK_LIST(oa, p->area_list)
{
if (oa->update_rt_lsa)

View file

@ -33,6 +33,7 @@ struct top_hash_entry
u32 lb_id; /* Interface ID of link back iface (for bcast or NBMA networks) */
u32 dist; /* Distance from the root */
int ret_count; /* Number of retransmission lists referencing the entry */
u8 dirty; /* Will be flushed during next LSAdb update unless reoriginated*/
u8 color;
#define OUTSPF 0
#define CANDIDATE 1
@ -180,6 +181,7 @@ struct top_hash_entry * ospf_originate_lsa(struct ospf_proto *p, struct ospf_new
void ospf_advance_lsa(struct ospf_proto *p, struct top_hash_entry *en, struct ospf_lsa_header *lsa, u32 type, u32 domain, void *body);
void ospf_flush_lsa(struct ospf_proto *p, struct top_hash_entry *en);
void ospf_update_lsadb(struct ospf_proto *p);
void ospf_mark_lsadb(struct ospf_proto *p);
static inline void ospf_flush2_lsa(struct ospf_proto *p, struct top_hash_entry **en)
{ if (*en) { ospf_flush_lsa(p, *en); *en = NULL; } }
@ -187,6 +189,7 @@ static inline void ospf_flush2_lsa(struct ospf_proto *p, struct top_hash_entry *
void ospf_originate_sum_net_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf, int metric);
void ospf_originate_sum_rt_lsa(struct ospf_proto *p, struct ospf_area *oa, u32 drid, int metric, u32 options);
void ospf_originate_ext_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf, u8 mode, u32 metric, u32 ebit, ip_addr fwaddr, u32 tag, int pbit, int dn);
void ospf_originate_gr_lsa(struct ospf_proto *p, struct ospf_iface *ifa);
void ospf_rt_notify(struct proto *P, struct channel *ch, net *n, rte *new, rte *old);
void ospf_update_topology(struct ospf_proto *p);

View file

@ -18,7 +18,7 @@ static struct log_config *this_log;
CF_DECLS
CF_KEYWORDS(LOG, SYSLOG, ALL, DEBUG, TRACE, INFO, REMOTE, WARNING, ERROR, AUTH, FATAL, BUG, STDERR, SOFT)
CF_KEYWORDS(NAME, CONFIRM, UNDO, CHECK, TIMEOUT, DEBUG, LATENCY, LIMIT, WATCHDOG, WARNING)
CF_KEYWORDS(NAME, CONFIRM, UNDO, CHECK, TIMEOUT, DEBUG, LATENCY, LIMIT, WATCHDOG, WARNING, STATUS)
%type <i> log_mask log_mask_list log_cat cfg_timeout
%type <t> cfg_name
@ -124,12 +124,19 @@ CF_CLI(CONFIGURE CONFIRM,,, [[Confirm last configuration change - deactivate und
CF_CLI(CONFIGURE UNDO,,, [[Undo last configuration change]])
{ cmd_reconfig_undo(); } ;
CF_CLI(CONFIGURE STATUS,,, [[Show configuration status]])
{ cmd_reconfig_status(); } ;
CF_CLI(CONFIGURE CHECK, cfg_name, [\"<file>\"], [[Parse configuration and check its validity]])
{ cmd_check_config($3); } ;
CF_CLI(DOWN,,, [[Shut the daemon down]])
{ cmd_shutdown(); } ;
CF_CLI(GRACEFUL DOWN,,, [[Shut the daemon down for graceful restart]])
{ cmd_graceful_restart(); } ;
cfg_name:
/* empty */ { $$ = NULL; }
| TEXT

View file

@ -1082,6 +1082,7 @@ sk_passive_connected(sock *s, int type)
t->fd = fd;
t->ttl = s->ttl;
t->tos = s->tos;
t->vrf = s->vrf;
t->rbsize = s->rbsize;
t->tbsize = s->tbsize;

View file

@ -1129,7 +1129,7 @@ krt_shutdown(struct proto *P)
krt_scan_timer_stop(p);
/* FIXME we should flush routes even when persist during reconfiguration */
if (p->initialized && !KRT_CF->persist)
if (p->initialized && !KRT_CF->persist && (P->down_code != PDC_CMD_GR_DOWN))
krt_flush_routes(p);
p->ready = 0;

View file

@ -338,6 +338,28 @@ cmd_reconfig_undo(void)
cmd_reconfig_msg(r);
}
void
cmd_reconfig_status(void)
{
int s = config_status();
btime t = config_timer_status();
switch (s)
{
case CONF_DONE: cli_msg(-3, "Daemon is up and running"); break;
case CONF_PROGRESS: cli_msg(-4, "Reconfiguration in progress"); break;
case CONF_QUEUED: cli_msg(-5, "Reconfiguration in progress, next one enqueued"); break;
case CONF_SHUTDOWN: cli_msg(-6, "Shutdown in progress"); break;
default: break;
}
if (t >= 0)
cli_msg(-22, "Configuration unconfirmed, undo in %t s", t);
cli_msg(0, "");
}
/*
* Command-Line Interface
*/
@ -542,14 +564,14 @@ cmd_shutdown(void)
return;
cli_msg(7, "Shutdown requested");
order_shutdown();
order_shutdown(0);
}
void
async_shutdown(void)
{
DBG("Shutting down...\n");
order_shutdown();
order_shutdown(0);
}
void
@ -561,6 +583,17 @@ sysdep_shutdown_done(void)
exit(0);
}
void
cmd_graceful_restart(void)
{
if (cli_access_restricted())
return;
cli_msg(25, "Graceful restart requested");
order_shutdown(1);
}
/*
* Signals
*/

View file

@ -26,7 +26,9 @@ void cmd_check_config(char *name);
void cmd_reconfig(char *name, int type, uint timeout);
void cmd_reconfig_confirm(void);
void cmd_reconfig_undo(void);
void cmd_reconfig_status(void);
void cmd_shutdown(void);
void cmd_graceful_restart(void);
#define UNIX_DEFAULT_CONFIGURE_TIMEOUT 300

View file

@ -492,6 +492,8 @@ void cmd_check_config(char *name UNUSED) {}
void cmd_reconfig(char *name UNUSED, int type UNUSED, int timeout UNUSED) {}
void cmd_reconfig_confirm(void) {}
void cmd_reconfig_undo(void) {}
void cmd_reconfig_status(void) {}
void cmd_graceful_restart(void) {}
void cmd_shutdown(void) {}
void cmd_reconfig_undo_notify(void) {}