Multipath support for OSPF

This commit is contained in:
Ondrej Zajicek 2010-12-07 23:35:39 +01:00
parent 9852f81064
commit 57c574d82a
8 changed files with 443 additions and 234 deletions

View file

@ -51,7 +51,7 @@ CF_KEYWORDS(HELLO, TRANSMIT, PRIORITY, DEAD, NONBROADCAST, POINTOPOINT, TYPE)
CF_KEYWORDS(NONE, SIMPLE, AUTHENTICATION, STRICT, CRYPTOGRAPHIC) CF_KEYWORDS(NONE, SIMPLE, AUTHENTICATION, STRICT, CRYPTOGRAPHIC)
CF_KEYWORDS(ELIGIBLE, POLL, NETWORKS, HIDDEN, VIRTUAL, CHECK, LINK) CF_KEYWORDS(ELIGIBLE, POLL, NETWORKS, HIDDEN, VIRTUAL, CHECK, LINK)
CF_KEYWORDS(RX, BUFFER, LARGE, NORMAL, STUBNET, HIDDEN, SUMMARY) CF_KEYWORDS(RX, BUFFER, LARGE, NORMAL, STUBNET, HIDDEN, SUMMARY)
CF_KEYWORDS(WAIT, DELAY, LSADB) CF_KEYWORDS(WAIT, DELAY, LSADB, ECMP, LIMIT, WEIGHT)
%type <t> opttext %type <t> opttext
@ -76,7 +76,9 @@ ospf_proto:
ospf_proto_item: ospf_proto_item:
proto_item proto_item
| RFC1583COMPAT bool { OSPF_CFG->rfc1583 = $2; } | RFC1583COMPAT bool { OSPF_CFG->rfc1583 = $2; }
| TICK expr { OSPF_CFG->tick = $2 ; if($2<=0) cf_error("Tick must be greater than zero"); } | ECMP bool { OSPF_CFG->ecmp = $2 ? DEFAULT_ECMP_LIMIT : 0; }
| ECMP bool LIMIT expr { OSPF_CFG->ecmp = $2 ? $4 : 0; if ($4 < 0) cf_error("ECMP limit cannot be negative"); }
| TICK expr { OSPF_CFG->tick = $2; if($2<=0) cf_error("Tick must be greater than zero"); }
| ospf_area '}' | ospf_area '}'
; ;
@ -193,6 +195,7 @@ ospf_iface_item:
| STRICT NONBROADCAST bool { OSPF_PATT->strictnbma = $3 ; } | STRICT NONBROADCAST bool { OSPF_PATT->strictnbma = $3 ; }
| STUB bool { OSPF_PATT->stub = $2 ; } | STUB bool { OSPF_PATT->stub = $2 ; }
| CHECK LINK bool { OSPF_PATT->check_link = $3; } | CHECK LINK bool { OSPF_PATT->check_link = $3; }
| ECMP WEIGHT expr { OSPF_PATT->ecmp_weight = $3 - 1; if (($3<1) || ($3>256)) cf_error("ECMP weight must be in range 1-256"); }
| NEIGHBORS '{' ipa_list '}' | NEIGHBORS '{' ipa_list '}'
| AUTHENTICATION NONE { OSPF_PATT->autype = OSPF_AUTH_NONE ; } | AUTHENTICATION NONE { OSPF_PATT->autype = OSPF_AUTH_NONE ; }
| AUTHENTICATION SIMPLE { OSPF_PATT->autype = OSPF_AUTH_SIMPLE ; } | AUTHENTICATION SIMPLE { OSPF_PATT->autype = OSPF_AUTH_SIMPLE ; }

View file

@ -436,6 +436,7 @@ ospf_iface_new(struct proto_ospf *po, struct iface *iface, struct ifa *addr,
ifa->ioprob = OSPF_I_OK; ifa->ioprob = OSPF_I_OK;
ifa->rxbuf = ip->rxbuf; ifa->rxbuf = ip->rxbuf;
ifa->check_link = ip->check_link; ifa->check_link = ip->check_link;
ifa->ecmp_weight = ip->ecmp_weight;
#ifdef OSPFv2 #ifdef OSPFv2
ifa->autype = ip->autype; ifa->autype = ip->autype;
@ -795,6 +796,8 @@ ospf_iface_info(struct ospf_iface *ifa)
ifa->stub ? "(stub)" : ""); ifa->stub ? "(stub)" : "");
cli_msg(-1015, "\tPriority: %u", ifa->priority); cli_msg(-1015, "\tPriority: %u", ifa->priority);
cli_msg(-1015, "\tCost: %u", ifa->cost); cli_msg(-1015, "\tCost: %u", ifa->cost);
if (ifa->oa->po->ecmp)
cli_msg(-1015, "\tECMP weight: %d", ((int) ifa->ecmp_weight) + 1);
cli_msg(-1015, "\tHello timer: %u", ifa->helloint); cli_msg(-1015, "\tHello timer: %u", ifa->helloint);
if (ifa->type == OSPF_IT_NBMA) if (ifa->type == OSPF_IT_NBMA)

View file

@ -147,6 +147,7 @@ ospf_start(struct proto *p)
po->router_id = proto_get_router_id(p->cf); po->router_id = proto_get_router_id(p->cf);
po->rfc1583 = c->rfc1583; po->rfc1583 = c->rfc1583;
po->ebit = 0; po->ebit = 0;
po->ecmp = c->ecmp;
po->tick = c->tick; po->tick = c->tick;
po->disp_timer = tm_new(p->pool); po->disp_timer = tm_new(p->pool);
po->disp_timer->data = po; po->disp_timer->data = po;
@ -157,6 +158,7 @@ ospf_start(struct proto *p)
po->lsab_size = 256; po->lsab_size = 256;
po->lsab_used = 0; po->lsab_used = 0;
po->lsab = mb_alloc(p->pool, po->lsab_size); po->lsab = mb_alloc(p->pool, po->lsab_size);
po->nhpool = lp_new(p->pool, 12*sizeof(struct mpnh));
init_list(&(po->iface_list)); init_list(&(po->iface_list));
init_list(&(po->area_list)); init_list(&(po->area_list));
fib_init(&po->rtf, p->pool, sizeof(ort), 0, ospf_rt_initort); fib_init(&po->rtf, p->pool, sizeof(ort), 0, ospf_rt_initort);
@ -514,6 +516,13 @@ ospf_shutdown(struct proto *p)
if (ifa->state > OSPF_IS_DOWN) if (ifa->state > OSPF_IS_DOWN)
ospf_iface_shutdown(ifa); ospf_iface_shutdown(ifa);
/* Cleanup locked rta entries */
FIB_WALK(&po->rtf, nftmp)
{
rta_free(((ort *) nftmp)->old_rta);
}
FIB_WALK_END;
return PS_DOWN; return PS_DOWN;
} }
@ -648,6 +657,7 @@ ospf_reconfigure(struct proto *p, struct proto_config *c)
schedule_rtcalc(po); schedule_rtcalc(po);
po->tick = new->tick; po->tick = new->tick;
po->ecmp = new->ecmp;
po->disp_timer->recurrent = po->tick; po->disp_timer->recurrent = po->tick;
tm_start(po->disp_timer, 1); tm_start(po->disp_timer, 1);
@ -767,6 +777,14 @@ ospf_reconfigure(struct proto *p, struct proto_config *c)
ospf_iface_sm(ifa, ifa->check_link ? ISM_LOOP : ISM_UNLOOP); ospf_iface_sm(ifa, ifa->check_link ? ISM_LOOP : ISM_UNLOOP);
} }
/* ECMP weight */
if (oldip->ecmp_weight != newip->ecmp_weight)
{
ifa->ecmp_weight = newip->ecmp_weight;
OSPF_TRACE(D_EVENTS, "Changing ECMP weight of interface %s from %d to %d",
ifa->iface->name, (int)oldip->ecmp_weight + 1, (int)newip->ecmp_weight + 1);
}
/* strict nbma */ /* strict nbma */
if ((oldip->strictnbma == 0) && (newip->strictnbma != 0)) if ((oldip->strictnbma == 0) && (newip->strictnbma != 0))
{ {

View file

@ -74,6 +74,7 @@ do { if ((p->debug & D_PACKETS) || OSPF_FORCE_DEBUG) \
#define DEFAULT_OSPFTICK 1 #define DEFAULT_OSPFTICK 1
#define DEFAULT_RFC1583 0 /* compatibility with rfc1583 */ #define DEFAULT_RFC1583 0 /* compatibility with rfc1583 */
#define DEFAULT_STUB_COST 1000 #define DEFAULT_STUB_COST 1000
#define DEFAULT_ECMP_LIMIT 16
struct ospf_config struct ospf_config
@ -81,6 +82,7 @@ struct ospf_config
struct proto_config c; struct proto_config c;
unsigned tick; unsigned tick;
int rfc1583; int rfc1583;
int ecmp;
list area_list; list area_list;
}; };
@ -247,6 +249,7 @@ struct ospf_iface
u8 sk_dr; /* Socket is a member of DRouters group */ u8 sk_dr; /* Socket is a member of DRouters group */
u16 rxbuf; /* Buffer size */ u16 rxbuf; /* Buffer size */
u8 check_link; /* Whether iface link change is used */ u8 check_link; /* Whether iface link change is used */
u8 ecmp_weight; /* Weight used for ECMP */
}; };
struct ospf_md5 struct ospf_md5
@ -730,11 +733,13 @@ struct proto_ospf
list area_list; list area_list;
int areano; /* Number of area I belong to */ int areano; /* Number of area I belong to */
struct fib rtf; /* Routing table */ struct fib rtf; /* Routing table */
int rfc1583; /* RFC1583 compatibility */ byte rfc1583; /* RFC1583 compatibility */
int ebit; /* Did I originate any ext lsa? */ byte ebit; /* Did I originate any ext lsa? */
byte ecmp; /* Maximal number of nexthops in ECMP route, or 0 */
struct ospf_area *backbone; /* If exists */ struct ospf_area *backbone; /* If exists */
void *lsab; /* LSA buffer used when originating router LSAs */ void *lsab; /* LSA buffer used when originating router LSAs */
int lsab_size, lsab_used; int lsab_size, lsab_used;
linpool *nhpool; /* Linpool used for next hops computed in SPF */
u32 router_id; u32 router_id;
}; };
@ -756,6 +761,7 @@ struct ospf_iface_patt
u32 vid; u32 vid;
u16 rxbuf; u16 rxbuf;
u8 check_link; u8 check_link;
u8 ecmp_weight;
#define OSPF_RXBUF_NORMAL 0 #define OSPF_RXBUF_NORMAL 0
#define OSPF_RXBUF_LARGE 1 #define OSPF_RXBUF_LARGE 1
#define OSPF_RXBUF_MINSIZE 256 /* Minimal allowed size */ #define OSPF_RXBUF_MINSIZE 256 /* Minimal allowed size */

View file

@ -10,10 +10,7 @@
static void add_cand(list * l, struct top_hash_entry *en, static void add_cand(list * l, struct top_hash_entry *en,
struct top_hash_entry *par, u32 dist, struct top_hash_entry *par, u32 dist,
struct ospf_area *oa); struct ospf_area *oa, struct ospf_lsa_rt_link *rtl);
static int calc_next_hop(struct ospf_area *oa,
struct top_hash_entry *en,
struct top_hash_entry *par);
static void rt_sync(struct proto_ospf *po); static void rt_sync(struct proto_ospf *po);
/* In ospf_area->rtr we store paths to routers, but we use RID (and not IP address) /* In ospf_area->rtr we store paths to routers, but we use RID (and not IP address)
@ -25,20 +22,48 @@ static void rt_sync(struct proto_ospf *po);
#endif #endif
static inline void reset_ri(orta * orta) static inline void reset_ri(ort *ort)
{ {
bzero(orta, sizeof(orta)); bzero(&ort->n, sizeof(orta));
} }
void void
ospf_rt_initort(struct fib_node *fn) ospf_rt_initort(struct fib_node *fn)
{ {
ort *ri = (ort *) fn; ort *ri = (ort *) fn;
reset_ri(&ri->n); reset_ri(ri);
reset_ri(&ri->o); ri->old_rta = NULL;
ri->fn.x0 = 0; ri->fn.x0 = 0;
} }
static inline int
unresolved_vlink(struct mpnh *nhs)
{
return nhs && !nhs->iface;
}
static inline struct mpnh *
new_nexthop(struct proto_ospf *po, ip_addr gw, struct iface *iface, unsigned char weight)
{
struct mpnh *nh = lp_alloc(po->nhpool, sizeof(struct mpnh));
nh->gw = gw;
nh->iface = iface;
nh->next = NULL;
nh->weight = weight;
return nh;
}
static inline struct mpnh *
copy_nexthop(struct proto_ospf *po, struct mpnh *src)
{
struct mpnh *nh = lp_alloc(po->nhpool, sizeof(struct mpnh));
nh->gw = src->gw;
nh->iface = src->iface;
nh->next = NULL;
nh->weight = src->weight;
return nh;
}
/* If new is better return 1 */ /* If new is better return 1 */
static int static int
@ -234,8 +259,7 @@ add_network(struct ospf_area *oa, ip_addr px, int pxlen, int metric, struct top_
.tag = 0, .tag = 0,
.rid = en->lsa.rt, .rid = en->lsa.rt,
.oa = oa, .oa = oa,
.ifa = en->nhi, .nhs = en->nhs
.nh = en->nh
}; };
if (en == oa->rt) if (en == oa->rt)
@ -248,8 +272,8 @@ add_network(struct ospf_area *oa, ip_addr px, int pxlen, int metric, struct top_
* be removed in rt_sync(). * be removed in rt_sync().
*/ */
nf.ifa = find_stub_src(oa, px, pxlen); struct ospf_iface *ifa = find_stub_src(oa, px, pxlen);
nf.nh = IPA_NONE; nf.nhs = ifa ? new_nexthop(oa->po, IPA_NONE, ifa->iface, ifa->ecmp_weight) : NULL;
} }
ri_install_net(oa->po, px, pxlen, &nf); ri_install_net(oa->po, px, pxlen, &nf);
@ -372,7 +396,7 @@ ospf_rt_spfa_rtlinks(struct ospf_area *oa, struct top_hash_entry *act, struct to
if (tmp) if (tmp)
DBG("Going to add cand, Mydist: %u, Req: %u\n", DBG("Going to add cand, Mydist: %u, Req: %u\n",
tmp->dist, act->dist + rtl->metric); tmp->dist, act->dist + rtl->metric);
add_cand(&oa->cand, tmp, act, act->dist + rtl->metric, oa); add_cand(&oa->cand, tmp, act, act->dist + rtl->metric, oa, rtl);
} }
} }
@ -439,8 +463,7 @@ ospf_rt_spfa(struct ospf_area *oa)
.tag = 0, .tag = 0,
.rid = act->lsa.rt, .rid = act->lsa.rt,
.oa = oa, .oa = oa,
.ifa = act->nhi, .nhs = act->nhs
.nh = act->nh
}; };
ri_install_rt(oa, act->lsa.rt, &nf); ri_install_rt(oa, act->lsa.rt, &nf);
} }
@ -471,7 +494,7 @@ ospf_rt_spfa(struct ospf_area *oa)
DBG("Found :-)\n"); DBG("Found :-)\n");
else else
DBG("Not found!\n"); DBG("Not found!\n");
add_cand(&oa->cand, tmp, act, act->dist, oa); add_cand(&oa->cand, tmp, act, act->dist, oa, NULL);
} }
break; break;
} }
@ -661,8 +684,7 @@ ospf_rt_sum(struct ospf_area *oa)
.tag = 0, .tag = 0,
.rid = en->lsa.rt, /* ABR ID */ .rid = en->lsa.rt, /* ABR ID */
.oa = oa, .oa = oa,
.ifa = abr->n.ifa, .nhs = abr->n.nhs
.nh = abr->n.nh
}; };
if (type == ORT_NET) if (type == ORT_NET)
@ -762,13 +784,18 @@ ospf_rt_sum_tr(struct ospf_area *oa)
metric = abr->n.metric1 + metric; /* IAC */ metric = abr->n.metric1 + metric; /* IAC */
/* 16.3. (5) */ /* 16.3. (5) */
if (metric <= re->n.metric1) if ((metric < re->n.metric1) ||
((metric == re->n.metric1) && unresolved_vlink(re->n.nhs)))
{ {
/* We want to replace the next-hop even if the metric is equal /* We want to replace the next-hop even if the metric is equal
to replace a virtual next-hop through vlink with a real one */ to replace a virtual next-hop through vlink with a real one.
Proper ECMP would merge nexthops here, but we do not do that.
We restrict nexthops to fit one area to simplify check
12.4.3 p4 in decide_sum_lsa() */
re->n.metric1 = metric; re->n.metric1 = metric;
re->n.nh = abr->n.nh; re->n.voa = oa;
re->n.ifa = abr->n.ifa; re->n.nhs = abr->n.nhs;
} }
} }
} }
@ -811,7 +838,7 @@ decide_sum_lsa(struct ospf_area *oa, ort *nf, int dest)
return 0; return 0;
/* 12.4.3 p4 */ /* 12.4.3 p4 */
if (nf->n.ifa && (nf->n.ifa->oa->areaid == oa->areaid)) if (nf->n.voa && (nf->n.voa->areaid == oa->areaid))
return 0; return 0;
/* 12.4.3 p5 */ /* 12.4.3 p5 */
@ -912,18 +939,20 @@ ospf_check_vlinks(struct proto_ospf *po)
struct top_hash_entry *tmp; struct top_hash_entry *tmp;
tmp = ospf_hash_find_rt(po->gr, iface->voa->areaid, iface->vid); tmp = ospf_hash_find_rt(po->gr, iface->voa->areaid, iface->vid);
if (tmp && (tmp->color == INSPF) && ipa_nonzero(tmp->lb)) if (tmp && (tmp->color == INSPF) && ipa_nonzero(tmp->lb) && tmp->nhs)
{ {
struct ospf_iface *nhi = ospf_iface_find(po, tmp->nhs->iface);
if ((iface->state != OSPF_IS_PTP) if ((iface->state != OSPF_IS_PTP)
|| (iface->vifa != tmp->nhi) || (iface->vifa != nhi)
|| !ipa_equal(iface->vip, tmp->lb)) || !ipa_equal(iface->vip, tmp->lb))
{ {
OSPF_TRACE(D_EVENTS, "Vlink peer %R found", tmp->lsa.id); OSPF_TRACE(D_EVENTS, "Vlink peer %R found", tmp->lsa.id);
ospf_iface_sm(iface, ISM_DOWN); ospf_iface_sm(iface, ISM_DOWN);
iface->vifa = tmp->nhi; iface->vifa = nhi;
iface->iface = tmp->nhi->iface; iface->iface = nhi->iface;
iface->addr = tmp->nhi->addr; iface->addr = nhi->addr;
iface->sk = tmp->nhi->sk; iface->sk = nhi->sk;
iface->cost = tmp->dist; iface->cost = tmp->dist;
iface->vip = tmp->lb; iface->vip = tmp->lb;
ospf_iface_sm(iface, ISM_UP); ospf_iface_sm(iface, ISM_UP);
@ -959,8 +988,8 @@ ospf_rt_abr(struct proto_ospf *po)
/* RFC 2328 G.3 - incomplete resolution of virtual next hops */ /* RFC 2328 G.3 - incomplete resolution of virtual next hops */
if (nf->n.type && nf->n.ifa && (nf->n.ifa->type == OSPF_IT_VLINK)) if (nf->n.type && unresolved_vlink(nf->n.nhs))
reset_ri(&nf->n); reset_ri(nf);
/* Compute condensed area networks */ /* Compute condensed area networks */
@ -979,7 +1008,7 @@ ospf_rt_abr(struct proto_ospf *po)
/* 16.2. (3) */ /* 16.2. (3) */
if (nfi->n.type == RTS_OSPF_IA) if (nfi->n.type == RTS_OSPF_IA)
reset_ri(&nfi->n); reset_ri(nfi);
} }
if (anet->metric < nf->n.metric1) if (anet->metric < nf->n.metric1)
@ -1055,10 +1084,10 @@ ospf_ext_spf(struct proto_ospf *po)
struct proto *p = &po->proto; struct proto *p = &po->proto;
struct ospf_lsa_ext *le; struct ospf_lsa_ext *le;
int pxlen, ebit, rt_fwaddr_valid; int pxlen, ebit, rt_fwaddr_valid;
ip_addr ip, nh, rtid, rt_fwaddr; ip_addr ip, rtid, rt_fwaddr;
struct ospf_iface *nhi = NULL;
u32 br_metric, rt_metric, rt_tag; u32 br_metric, rt_metric, rt_tag;
struct ospf_area *atmp; struct ospf_area *atmp;
struct mpnh* nhs = NULL;
OSPF_TRACE(D_EVENTS, "Starting routing table calculation for ext routes"); OSPF_TRACE(D_EVENTS, "Starting routing table calculation for ext routes");
@ -1119,8 +1148,6 @@ ospf_ext_spf(struct proto_ospf *po)
p->name, en->lsa.type, en->lsa.id, en->lsa.rt); p->name, en->lsa.type, en->lsa.id, en->lsa.rt);
continue; continue;
} }
nhi = NULL;
nh = IPA_NONE;
/* 16.4. (3) */ /* 16.4. (3) */
/* If there are more areas, we already precomputed preferred ASBR entries /* If there are more areas, we already precomputed preferred ASBR entries
@ -1138,8 +1165,7 @@ ospf_ext_spf(struct proto_ospf *po)
if (!rt_fwaddr_valid) if (!rt_fwaddr_valid)
{ {
nf2 = nf1; nf2 = nf1;
nh = nf1->n.nh; nhs = nf1->n.nhs;
nhi = nf1->n.ifa;
br_metric = nf1->n.metric1; br_metric = nf1->n.metric1;
} }
else else
@ -1152,12 +1178,13 @@ ospf_ext_spf(struct proto_ospf *po)
continue; continue;
/* Next-hop is a part of a configured stubnet */ /* Next-hop is a part of a configured stubnet */
if (!nf2->n.ifa) if (!nf2->n.nhs)
continue; continue;
/* If nh is zero, it is a device route */ nhs = nf2->n.nhs;
nh = ipa_nonzero(nf2->n.nh) ? nf2->n.nh : rt_fwaddr; /* If gw is zero, it is a device route */
nhi = nf2->n.ifa; if (ipa_zero(nhs->gw))
nhs = new_nexthop(po, rt_fwaddr, nhs->iface, nhs->weight);
br_metric = nf2->n.metric1; br_metric = nf2->n.metric1;
} }
@ -1183,14 +1210,14 @@ ospf_ext_spf(struct proto_ospf *po)
nfa.tag = rt_tag; nfa.tag = rt_tag;
nfa.rid = en->lsa.rt; nfa.rid = en->lsa.rt;
nfa.oa = nf1->n.oa; /* undefined in RFC 2328 */ nfa.oa = nf1->n.oa; /* undefined in RFC 2328 */
nfa.ifa = nhi; nfa.voa = NULL;
nfa.nh = nh; nfa.nhs = nhs;
ri_install_ext(po, ip, pxlen, &nfa); ri_install_ext(po, ip, pxlen, &nfa);
} }
} }
/* Cleanup of routing tables and data Cleanup */ /* Cleanup of routing tables and data */
void void
ospf_rt_reset(struct proto_ospf *po) ospf_rt_reset(struct proto_ospf *po)
{ {
@ -1203,9 +1230,8 @@ ospf_rt_reset(struct proto_ospf *po)
FIB_WALK(&po->rtf, nftmp) FIB_WALK(&po->rtf, nftmp)
{ {
ri = (ort *) nftmp; ri = (ort *) nftmp;
memcpy(&ri->o, &ri->n, sizeof(orta)); /* Backup old data */
ri->fn.x0 = 0; ri->fn.x0 = 0;
reset_ri(&ri->n); reset_ri(ri);
} }
FIB_WALK_END; FIB_WALK_END;
@ -1214,8 +1240,7 @@ ospf_rt_reset(struct proto_ospf *po)
{ {
en->color = OUTSPF; en->color = OUTSPF;
en->dist = LSINFINITY; en->dist = LSINFINITY;
en->nhi = NULL; en->nhs = NULL;
en->nh = IPA_NONE;
en->lb = IPA_NONE; en->lb = IPA_NONE;
} }
@ -1225,8 +1250,7 @@ ospf_rt_reset(struct proto_ospf *po)
FIB_WALK(&oa->rtr, nftmp) FIB_WALK(&oa->rtr, nftmp)
{ {
ri = (ort *) nftmp; ri = (ort *) nftmp;
memcpy(&ri->o, &ri->n, sizeof(orta)); /* Backup old data */ reset_ri(ri);
reset_ri(&ri->n);
} }
FIB_WALK_END; FIB_WALK_END;
@ -1288,15 +1312,220 @@ ospf_rt_spf(struct proto_ospf *po)
ospf_ext_spf(po); ospf_ext_spf(po);
rt_sync(po); rt_sync(po);
lp_flush(po->nhpool);
po->calcrt = 0; po->calcrt = 0;
} }
static inline int
match_dr(struct ospf_iface *ifa, struct top_hash_entry *en)
{
#ifdef OSPFv2
return (ifa->drid == en->lsa.rt) && (ipa_to_u32(ifa->drip) == en->lsa.id);
#else /* OSPFv3 */
return (ifa->drid == en->lsa.rt) && (ifa->dr_iface_id == en->lsa.id);
#endif
}
static inline int
match_rtlink(struct ospf_iface *ifa, struct ospf_lsa_rt_link *rtl)
{
#ifdef OSPFv2
return (ifa->type == OSPF_IT_PTP) && (ifa->cost == rtl->metric) &&
(((ifa->addr->flags & IA_UNNUMBERED) ? ifa->iface->index :
ipa_to_u32(ifa->addr->ip)) == rtl->data);
#else /* OSPFv3 */
return (ifa->type == OSPF_IT_PTP) && (ifa->cost == rtl->metric) &&
(ifa->iface->index == rtl->lif);
#endif
}
static inline int
inherit_nexthops(struct mpnh *pn)
{
/* Proper nexthops (with defined GW) or dummy vlink nexthops (without iface) */
return pn && (ipa_nonzero(pn->gw) || !pn->iface);
}
static struct mpnh *
calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en,
struct top_hash_entry *par, struct ospf_lsa_rt_link *rtl)
{
// struct proto *p = &oa->po->proto;
struct proto_ospf *po = oa->po;
struct mpnh *pn = par->nhs;
struct ospf_iface *ifa;
u32 rid = en->lsa.rt;
/* 16.1.1. The next hop calculation */
DBG(" Next hop calculating for id: %R rt: %R type: %u\n",
en->lsa.id, en->lsa.rt, en->lsa.type);
/* Usually, we inherit parent nexthops */
if (inherit_nexthops(pn))
return pn;
/*
* There are three cases:
* 1) en is a local network (and par is root)
* 2) en is a ptp or ptmp neighbor (and par is root)
* 3) en is a bcast or nbma neighbor (and par is local network)
*/
/* The first case - local network */
if ((en->lsa.type == LSA_T_NET) && (par == oa->rt))
{
WALK_LIST(ifa, po->iface_list)
if (match_dr(ifa, en))
return new_nexthop(po, IPA_NONE, ifa->iface, ifa->ecmp_weight);
return NULL;
}
/* The second case - ptp or ptmp neighbor */
if ((en->lsa.type == LSA_T_RT) && (par == oa->rt))
{
if (rtl->type == LSART_VLNK)
return new_nexthop(po, IPA_NONE, NULL, 0);
WALK_LIST(ifa, po->iface_list)
if (match_rtlink(ifa, rtl))
{
struct ospf_neighbor *m = find_neigh(ifa, rid);
if (m && (m->state == NEIGHBOR_FULL))
return new_nexthop(po, m->ip, ifa->iface, ifa->ecmp_weight);
}
return NULL;
}
/* The third case - bcast or nbma neighbor */
if ((en->lsa.type == LSA_T_RT) && (par->lsa.type == LSA_T_NET))
{
/* par->nhi should be defined from parent's calc_next_hop() */
if (!pn)
goto bad;
#ifdef OSPFv2
/*
* In this case, next-hop is the same as link-back, which is
* already computed in link_back().
*/
if (ipa_zero(en->lb))
goto bad;
return new_nexthop(po, en->lb, pn->iface, pn->weight);
#else /* OSPFv3 */
/*
* Next-hop is taken from lladdr field of Link-LSA, en->lb_id
* is computed in link_back().
*/
struct top_hash_entry *lhe;
lhe = ospf_hash_find(po->gr, pn->iface->index, en->lb_id, rid, LSA_T_LINK);
if (!lhe)
return NULL;
struct ospf_lsa_link *llsa = lhe->lsa_body;
if (ipa_zero(llsa->lladdr))
return NULL;
return new_nexthop(po, llsa->lladdr, pn->iface, pn->weight);
#endif
}
bad:
/* Probably bug or some race condition, we log it */
log(L_ERR "Unexpected case in next hop calculation");
return NULL;
}
/* Compare nexthops during merge.
We need to maintain nhs sorted to eliminate duplicities */
static int
cmp_nhs(struct mpnh *s1, struct mpnh *s2)
{
int r;
if (!s1)
return 1;
if (!s2)
return -1;
r = ((int) s2->weight) - ((int) s1->weight);
if (r)
return r;
r = ipa_compare(s1->gw, s2->gw);
if (r)
return r;
return ((int) s1->iface->index) - ((int) s2->iface->index);
}
static void
merge_nexthops(struct proto_ospf *po, struct top_hash_entry *en,
struct top_hash_entry *par, struct mpnh *new)
{
if (en->nhs == new)
return;
int r1 = en->nhs_reuse;
int r2 = (par->nhs != new);
int count = po->ecmp;
struct mpnh *s1 = en->nhs;
struct mpnh *s2 = new;
struct mpnh **n = &(en->nhs);
/*
* r1, r2 signalize whether we can reuse nexthops from s1, s2.
* New nexthops (s2, new) can be reused if they are not inherited
* from the parent (i.e. it is allocated in calc_next_hop()).
* Current nexthops (s1, en->nhs) can be reused if they weren't
* inherited in previous steps (that is stored in nhs_reuse,
* i.e. created by merging or allocalted in calc_next_hop()).
*
* Generally, a node first inherits shared nexthops from its
* parent and later possibly gets reusable copy during merging.
*/
while ((s1 || s2) && count--)
{
int cmp = cmp_nhs(s1, s2);
if (cmp < 0)
{
*n = r1 ? s1 : copy_nexthop(po, s1);
s1 = s1->next;
}
else if (cmp > 0)
{
*n = r2 ? s2 : copy_nexthop(po, s2);
s2 = s2->next;
}
else
{
*n = r1 ? s1 : (r2 ? s2 : copy_nexthop(po, s1));
s1 = s1->next;
s2 = s2->next;
}
n = &((*n)->next);
}
*n = NULL;
en->nhs_reuse=1;
}
/* Add LSA into list of candidates in Dijkstra's algorithm */ /* Add LSA into list of candidates in Dijkstra's algorithm */
static void static void
add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par, add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par,
u32 dist, struct ospf_area *oa) u32 dist, struct ospf_area *oa, struct ospf_lsa_rt_link *rtl)
{ {
struct proto_ospf *po = oa->po;
node *prev, *n; node *prev, *n;
int added = 0; int added = 0;
struct top_hash_entry *act; struct top_hash_entry *act;
@ -1321,24 +1550,48 @@ add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par,
return; return;
/* 16.1. (2d), also checks that dist < LSINFINITY */ /* 16.1. (2d), also checks that dist < LSINFINITY */
if (dist >= en->dist) if (dist > en->dist)
return; return;
/*
* The line above (=) is not a bug, but we don't support multiple
* next hops. I'll start as soon as nest will
*/
/* We should check whether there is a reverse link from en to par, */ /* We should check whether there is a reverse link from en to par, */
if (!link_back(oa, en, par)) if (!link_back(oa, en, par))
return; return;
if (!calc_next_hop(oa, en, par)) struct mpnh *nhs = calc_next_hop(oa, en, par, rtl);
if (!nhs)
{ {
log(L_WARN "Cannot find next hop for LSA (Type: %04x, Id: %R, Rt: %R)", log(L_WARN "Cannot find next hop for LSA (Type: %04x, Id: %R, Rt: %R)",
en->lsa.type, en->lsa.id, en->lsa.rt); en->lsa.type, en->lsa.id, en->lsa.rt);
return; return;
} }
if (dist == en->dist)
{
/*
* For multipath, we should merge nexthops. We do not mix dummy
* vlink nexthops, device nexthops and gateway nexthops. We merge
* gateway nexthops only. We prefer device nexthops over gateway
* nexthops and gateway nexthops over vlink nexthops. We either
* keep old nexthops, merge old and new, or replace old with new.
*
* We know that en->color == CANDIDATE and en->nhs is defined.
*/
struct mpnh *onhs = en->nhs;
/* Keep old ones */
if (!po->ecmp || !nhs->iface || (onhs->iface && ipa_zero(onhs->gw)))
return;
/* Merge old and new */
if (ipa_nonzero(nhs->gw) && ipa_nonzero(onhs->gw))
{
merge_nexthops(po, en, par, nhs);
return;
}
/* Fallback to replace old ones */
}
DBG(" Adding candidate: rt: %R, id: %R, type: %u\n", DBG(" Adding candidate: rt: %R, id: %R, type: %u\n",
en->lsa.rt, en->lsa.id, en->lsa.type); en->lsa.rt, en->lsa.id, en->lsa.type);
@ -1346,8 +1599,10 @@ add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par,
{ /* We found a shorter path */ { /* We found a shorter path */
rem_node(&en->cn); rem_node(&en->cn);
} }
en->nhs = nhs;
en->dist = dist; en->dist = dist;
en->color = CANDIDATE; en->color = CANDIDATE;
en->nhs_reuse = (par->nhs != nhs);
prev = NULL; prev = NULL;
@ -1361,8 +1616,7 @@ add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par,
{ {
act = SKIP_BACK(struct top_hash_entry, cn, n); act = SKIP_BACK(struct top_hash_entry, cn, n);
if ((act->dist > dist) || if ((act->dist > dist) ||
((act->dist == dist) && (act->lsa.type == LSA_T_NET))) ((act->dist == dist) && (act->lsa.type == LSA_T_RT)))
/* FIXME - shouldn't be here LSA_T_RT ??? */
{ {
if (prev == NULL) if (prev == NULL)
add_head(l, &en->cn); add_head(l, &en->cn);
@ -1381,132 +1635,16 @@ add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par,
} }
} }
static inline int static inline int
match_dr(struct ospf_iface *ifa, struct top_hash_entry *en) ort_changed(ort *nf, rta *nr)
{ {
#ifdef OSPFv2 rta *or = nf->old_rta;
return (ifa->drid == en->lsa.rt) && (ipa_to_u32(ifa->drip) == en->lsa.id); return !or ||
#else /* OSPFv3 */ (nf->n.metric1 != nf->old_metric1) || (nf->n.metric2 != nf->old_metric2) ||
return (ifa->drid == en->lsa.rt) && (ifa->dr_iface_id == en->lsa.id); (nf->n.tag != nf->old_tag) || (nf->n.rid != nf->old_rid) ||
#endif (nr->source != or->source) || (nr->dest != or->dest) ||
} (nr->iface != or->iface) || !ipa_equal(nr->gw, or->gw) ||
!mpnh_same(nr->nexthops, or->nexthops);
static int
calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en,
struct top_hash_entry *par)
{
// struct proto *p = &oa->po->proto;
struct ospf_neighbor *neigh, *m;
struct proto_ospf *po = oa->po;
struct ospf_iface *ifa;
/* 16.1.1. The next hop calculation */
DBG(" Next hop called.\n");
if (ipa_zero(par->nh))
{
u32 rid = en->lsa.rt;
DBG(" Next hop calculating for id: %R rt: %R type: %u\n",
en->lsa.id, en->lsa.rt, en->lsa.type);
/*
* There are three cases:
* 1) en is a local network (and par is root)
* 2) en is a ptp or ptmp neighbor (and par is root)
* 3) en is a bcast or nbma neighbor (and par is local network)
*/
/* The first case - local network */
if ((en->lsa.type == LSA_T_NET) && (par == oa->rt))
{
WALK_LIST(ifa, po->iface_list)
if (match_dr(ifa, en))
{
en->nh = IPA_NONE;
en->nhi = ifa;
return 1;
}
return 0;
}
/* The second case - ptp or ptmp neighbor */
if ((en->lsa.type == LSA_T_RT) && (par == oa->rt))
{
/*
* We don't know which iface was used to reach this neighbor
* (there might be more parallel ifaces) so we will find
* the best PTP iface with given fully adjacent neighbor.
*/
neigh = NULL;
WALK_LIST(ifa, po->iface_list)
if ((ifa->type == OSPF_IT_PTP) || (ifa->type == OSPF_IT_VLINK))
{
m = find_neigh(ifa, rid);
if (m && (m->state == NEIGHBOR_FULL))
{
if (!neigh || (m->ifa->cost < neigh->ifa->cost))
neigh = m;
}
}
if (!neigh)
return 0;
en->nh = neigh->ip;
en->nhi = neigh->ifa;
return 1;
}
/* The third case - bcast or nbma neighbor */
if ((en->lsa.type == LSA_T_RT) && (par->lsa.type == LSA_T_NET))
{
/* par->nhi should be defined from parent's calc_next_hop() */
if (!par->nhi)
goto bad;
#ifdef OSPFv2
/*
* In this case, next-hop is the same as link-back, which is
* already computed in link_back().
*/
if (ipa_zero(en->lb))
goto bad;
en->nh = en->lb;
en->nhi = par->nhi;
return 1;
#else /* OSPFv3 */
/*
* Next-hop is taken from lladdr field of Link-LSA, en->lb_id
* is computed in link_back().
*/
struct top_hash_entry *lhe;
lhe = ospf_hash_find(po->gr, par->nhi->iface->index, en->lb_id, rid, LSA_T_LINK);
if (!lhe)
return 0;
struct ospf_lsa_link *llsa = lhe->lsa_body;
if (ipa_zero(llsa->lladdr))
return 0;
en->nh = llsa->lladdr;
en->nhi = par->nhi;
return 1;
#endif
}
bad:
/* Probably bug or some race condition, we log it */
log(L_ERR "Unexpected case in next hop calculation");
return 0;
}
en->nh = par->nh;
en->nhi = par->nhi;
return 1;
} }
static void static void
@ -1530,24 +1668,25 @@ again1:
{ {
nf = (ort *) nftmp; nf = (ort *) nftmp;
/* Sanity check of next-hop address */ /* Sanity check of next-hop addresses, failure should not happen */
if (nf->n.type && ipa_nonzero(nf->n.nh)) if (nf->n.type)
{ {
neighbor *ng = neigh_find2(p, &nf->n.nh, nf->n.ifa->iface, 0); struct mpnh *nh;
for (nh = nf->n.nhs; nh; nh = nh->next)
if (ipa_nonzero(nh->gw))
{
neighbor *ng = neigh_find2(p, &nh->gw, nh->iface, 0);
if (!ng || (ng->scope == SCOPE_HOST)) if (!ng || (ng->scope == SCOPE_HOST))
reset_ri(&nf->n); { reset_ri(nf); break; }
}
} }
if (po->areano > 1) if (po->areano > 1)
check_sum_net_lsa(po, nf); check_sum_net_lsa(po, nf);
/* Remove configured stubnets */ /* Remove configured stubnets */
if (!nf->n.ifa) if (!nf->n.nhs)
reset_ri(&nf->n); reset_ri(nf);
if (reload || memcmp(&nf->n, &nf->o, sizeof(orta)))
{
net *ne = net_get(p->table, nf->fn.prefix, nf->fn.pxlen);
if (nf->n.type) /* Add the route */ if (nf->n.type) /* Add the route */
{ {
@ -1556,30 +1695,55 @@ again1:
.source = nf->n.type, .source = nf->n.type,
.scope = SCOPE_UNIVERSE, .scope = SCOPE_UNIVERSE,
.cast = RTC_UNICAST, .cast = RTC_UNICAST,
.iface = nf->n.ifa->iface
}; };
if (ipa_nonzero(nf->n.nh)) if (nf->n.nhs->next)
{
a0.dest = RTD_MULTIPATH;
a0.nexthops = nf->n.nhs;
}
else if (ipa_nonzero(nf->n.nhs->gw))
{ {
a0.dest = RTD_ROUTER; a0.dest = RTD_ROUTER;
a0.gw = nf->n.nh; a0.iface = nf->n.nhs->iface;
a0.gw = nf->n.nhs->gw;
} }
else else
{
a0.dest = RTD_DEVICE; a0.dest = RTD_DEVICE;
a0.iface = nf->n.nhs->iface;
}
rte *e = rte_get_temp(&a0); if (reload || ort_changed(nf, &a0))
e->u.ospf.metric1 = nf->n.metric1; {
e->u.ospf.metric2 = nf->n.metric2; net *ne = net_get(p->table, nf->fn.prefix, nf->fn.pxlen);
e->u.ospf.tag = nf->n.tag; rta *a = rta_lookup(&a0);
e->u.ospf.router_id = nf->n.rid; rte *e = rte_get_temp(a);
rta_free(nf->old_rta);
nf->old_rta = rta_clone(a);
e->u.ospf.metric1 = nf->old_metric1 = nf->n.metric1;
e->u.ospf.metric2 = nf->old_metric2 = nf->n.metric2;
e->u.ospf.tag = nf->old_tag = nf->n.tag;
e->u.ospf.router_id = nf->old_rid = nf->n.rid;
e->pflags = 0; e->pflags = 0;
e->net = ne; e->net = ne;
e->pref = p->preference; e->pref = p->preference;
DBG("Mod rte type %d - %I/%d via %I on iface %s, met %d\n", DBG("Mod rte type %d - %I/%d via %I on iface %s, met %d\n",
a0.source, nf->fn.prefix, nf->fn.pxlen, a0.gw, a0.iface ? a0.iface->name : "(none)", nf->n.metric1); a0.source, nf->fn.prefix, nf->fn.pxlen, a0.gw, a0.iface ? a0.iface->name : "(none)", nf->n.metric1);
rte_update(p->table, ne, p, p, e); rte_update(p->table, ne, p, p, e);
} }
else /* Remove the route */ }
else if (nf->old_rta)
{
/* Remove the route */
rta_free(nf->old_rta);
nf->old_rta = NULL;
net *ne = net_get(p->table, nf->fn.prefix, nf->fn.pxlen);
rte_update(p->table, ne, p, p, NULL); rte_update(p->table, ne, p, p, NULL);
} }

View file

@ -40,21 +40,32 @@ typedef struct orta
u32 tag; u32 tag;
u32 rid; /* Router ID of real advertising router */ u32 rid; /* Router ID of real advertising router */
struct ospf_area *oa; struct ospf_area *oa;
struct ospf_iface *ifa; /* Outgoing interface */ struct ospf_area *voa; /* Used when route is replaced in ospf_rt_sum_tr(),
ip_addr nh; /* Next hop */ NULL otherwise */
struct mpnh *nhs; /* Next hops computed during SPF */
} }
orta; orta;
// struct ospf_iface *ifa; /* Outgoing interface */
// ip_addr nh; /* Next hop */
typedef struct ort typedef struct ort
{ {
/* /*
* We use fn.x0 to mark persistent rt entries, that are needed for summary * We use fn.x0 to mark persistent rt entries, that are needed for summary
* LSAs that don't have 'proper' rt entry (area networks + default to stubs) * LSAs that don't have 'proper' rt entry (area networks + default to stubs)
* to keep uid stable (used for LSA ID in OSPFv3 - see fibnode_to_lsaid()). * to keep uid stable (used for LSA ID in OSPFv3 - see fibnode_to_lsaid()).
*
* old_* values are here to represent the last route update. old_rta
* is cached (we keep reference), mainly for multipath nexthops.
* old_rta == NULL means route wasn not in the last update, in that
* case other old_* values are not valid.
*/ */
struct fib_node fn; struct fib_node fn;
orta n; orta n;
orta o; u32 old_metric1, old_metric2, old_tag, old_rid;
rta *old_rta;
} }
ort; ort;
@ -64,18 +75,24 @@ ort;
* - only router, network and AS-external LSAs * - only router, network and AS-external LSAs
* - lsa.age < LSA_MAXAGE * - lsa.age < LSA_MAXAGE
* - dist < LSINFINITY (or 2*LSINFINITY for ext-LSAs) * - dist < LSINFINITY (or 2*LSINFINITY for ext-LSAs)
* - nhi are non-NULL unless the node is oa->rt (calculating router itself) * - nhs is non-NULL unless the node is oa->rt (calculating router itself)
* - beware, nhi is not valid after SPF calculation * - beware, nhs is not valid after SPF calculation
* - nh is IFA_NONE iff the node is a local network
* *
* Invariants for structs orta nodes of fib tables po->rtf, oa->rtr: * Invariants for structs orta nodes of fib tables po->rtf, oa->rtr:
* - nodes may be invalid (fn.type == 0), in that case other invariants don't hold * - nodes may be invalid (fn.type == 0), in that case other invariants don't hold
* - n.metric1 may be at most a small multiple of LSINFINITY, * - n.metric1 may be at most a small multiple of LSINFINITY,
* therefore sums do not overflow * therefore sums do not overflow
* - n.oa is always non-NULL * - n.oa is always non-NULL
* - n.ifa is always non-NULL with one exception - configured stubnet * - n.nhs is always non-NULL with one exception - configured stubnet
nodes (in po->rtf). In that case, n.nh is IFA_NONE. * nodes (in po->rtf).
* - oa->rtr does not contain calculating router itself * - oa->rtr does not contain calculating router itself
*
* There are three types of nexthops in nhs fields:
* - gateway nexthops (non-NULL iface, gw != IPA_NONE)
* - device nexthops (non-NULL iface, gw == IPA_NONE)
* - dummy vlink nexthops (NULL iface, gw == IPA_NONE)
* These three types don't mix, nhs field contains either
* one device, one vlink node, or one/more gateway nodes.
*/ */
void ospf_rt_spf(struct proto_ospf *po); void ospf_rt_spf(struct proto_ospf *po);

View file

@ -1674,14 +1674,12 @@ ospf_hash_get(struct top_graph *f, u32 domain, u32 lsa, u32 rtr, u32 type)
e = sl_alloc(f->hash_slab); e = sl_alloc(f->hash_slab);
e->color = OUTSPF; e->color = OUTSPF;
e->dist = LSINFINITY; e->dist = LSINFINITY;
e->nhi = NULL; e->nhs = NULL;
e->nh = IPA_NONE;
e->lb = IPA_NONE; e->lb = IPA_NONE;
e->lsa.id = lsa; e->lsa.id = lsa;
e->lsa.rt = rtr; e->lsa.rt = rtr;
e->lsa.type = type; e->lsa.type = type;
e->lsa_body = NULL; e->lsa_body = NULL;
e->nhi = NULL;
e->domain = domain; e->domain = domain;
e->next = *ee; e->next = *ee;
*ee = e; *ee = e;

View file

@ -20,9 +20,8 @@ struct top_hash_entry
// struct ospf_area *oa; // struct ospf_area *oa;
void *lsa_body; void *lsa_body;
bird_clock_t inst_t; /* Time of installation into DB */ bird_clock_t inst_t; /* Time of installation into DB */
ip_addr nh; /* Next hop */ struct mpnh *nhs; /* Computed nexthops - valid only in ospf_rt_spf() */
ip_addr lb; /* In OSPFv2, link back address. In OSPFv3, any global address in the area useful for vlinks */ ip_addr lb; /* In OSPFv2, link back address. In OSPFv3, any global address in the area useful for vlinks */
struct ospf_iface *nhi; /* Next hop interface - valid only in ospf_rt_spf()*/
#ifdef OSPFv3 #ifdef OSPFv3
u32 lb_id; /* Interface ID of link back iface (for bcast or NBMA networks) */ u32 lb_id; /* Interface ID of link back iface (for bcast or NBMA networks) */
#endif #endif
@ -32,7 +31,8 @@ struct top_hash_entry
#define OUTSPF 0 #define OUTSPF 0
#define CANDIDATE 1 #define CANDIDATE 1
#define INSPF 2 #define INSPF 2
u8 padding; u8 nhs_reuse; /* Whether nhs nodes can be reused during merging.
See a note in rt.c:merge_nexthops() */
}; };
struct top_graph struct top_graph