From cfe34a316e35a209fcd814ccf3523c262e8d4b0a Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Mon, 5 Jul 2010 17:50:19 +0200 Subject: [PATCH] Implements hostcache and recursive next hops. Hostcache is a structure for monitoring changes in a routing table that is used for routes with dynamic/recursive next hops. This is needed for proper iBGP next hop handling. --- nest/proto.c | 6 +- nest/protocol.h | 1 + nest/route.h | 45 +++- nest/rt-attr.c | 2 + nest/rt-table.c | 416 +++++++++++++++++++++++++++++++-- proto/bgp/attrs.c | 6 +- proto/bgp/bgp.c | 20 +- proto/bgp/bgp.h | 2 + proto/bgp/config.Y | 3 +- proto/bgp/packets.c | 32 +-- proto/pipe/pipe.c | 11 +- sysdep/bsd/krt-sock.c | 18 +- sysdep/linux/krt-scan.c | 15 +- sysdep/linux/netlink/netlink.c | 17 +- 14 files changed, 512 insertions(+), 82 deletions(-) diff --git a/nest/proto.c b/nest/proto.c index c9e2f5c7..16ec3f9b 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -577,10 +577,8 @@ proto_fell_down(struct proto *p) bzero(&p->stats, sizeof(struct proto_stats)); rt_unlock_table(p->table); -#ifdef CONFIG_PIPE - if (proto_is_pipe(p)) - rt_unlock_table(pipe_get_peer_table(p)); -#endif + if (p->proto->cleanup) + p->proto->cleanup(p); proto_rethink_goal(p); } diff --git a/nest/protocol.h b/nest/protocol.h index 5dac2a9b..70999f0e 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -48,6 +48,7 @@ struct protocol { void (*dump_attrs)(struct rte *); /* Dump protocol-dependent attributes */ int (*start)(struct proto *); /* Start the instance */ int (*shutdown)(struct proto *); /* Stop the instance */ + void (*cleanup)(struct proto *); /* Called after shutdown when protocol became hungry/down */ void (*get_status)(struct proto *, byte *buf); /* Get instance status (for `show protocols' command) */ void (*get_route_info)(struct rte *, byte *buf, struct ea_list *attrs); /* Get route information (for `show route' command) */ int (*get_attr)(struct eattr *, byte *buf, int buflen); /* ASCIIfy dynamic attribute (returns GA_*) */ diff --git a/nest/route.h b/nest/route.h index 99803e7c..e599f5b9 100644 --- a/nest/route.h +++ b/nest/route.h @@ -129,14 +129,19 @@ typedef struct rtable { list hooks; /* List of announcement hooks */ int pipe_busy; /* Pipe loop detection */ int use_count; /* Number of protocols using this table */ + struct hostcache *hostcache; struct rtable_config *config; /* Configuration of this table */ struct config *deleted; /* Table doesn't exist in current configuration, * delete as soon as use_count becomes 0 and remove * obstacle from this routing table. */ - struct event *gc_event; /* Garbage collector event */ + struct event *rt_event; /* Routing table event */ int gc_counter; /* Number of operations since last GC */ bird_clock_t gc_time; /* Time of last GC */ + byte gc_scheduled; /* GC is scheduled */ + byte hcu_scheduled; /* Hostcache update is scheduled */ + byte nhu_state; /* Next Hop Update state */ + struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */ } rtable; typedef struct network { @@ -144,6 +149,23 @@ typedef struct network { struct rte *routes; /* Available routes for this network */ } net; +struct hostcache { + struct fib htable; + list hostentries; + byte update_hostcache; +}; + +struct hostentry { + struct fib_node fn; + node ln; + unsigned uc; /* Use count */ + struct iface *iface; /* Chosen outgoing interface */ + ip_addr gw; /* Chosen next hop */ + byte dest; /* Chosen route destination type (RTD_...) */ + byte pxlen; /* Pxlen from net that matches route */ + struct rtable *tab; +}; + typedef struct rte { struct rte *next; net *net; /* Network this RTE belongs to */ @@ -207,7 +229,6 @@ void rt_dump(rtable *); void rt_dump_all(void); int rt_feed_baby(struct proto *p); void rt_feed_baby_abort(struct proto *p); -void rt_prune(rtable *tab); void rt_prune_all(void); struct rtable_config *rt_new_table(struct symbol *s); @@ -248,6 +269,7 @@ typedef struct rta { u16 hash_key; /* Hash over important fields */ ip_addr gw; /* Next hop */ ip_addr from; /* Advertising router */ + struct hostentry *hostentry; /* Hostentry for recursive next-hops */ struct iface *iface; /* Outgoing interface */ struct ea_list *eattrs; /* Extended Attribute chain */ } rta; @@ -357,6 +379,25 @@ static inline void rta_free(rta *r) { if (r && !--r->uc) rta__free(r); } void rta_dump(rta *); void rta_dump_all(void); void rta_show(struct cli *, rta *, ea_list *); +void rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr *gw); + +/* + * rta_set_recursive_next_hop() acquires hostentry from hostcache and + * fills rta->hostentry field. New hostentry has zero use + * count. Cached rta locks its hostentry (increases its use count), + * uncached rta does not lock it. Hostentry with zero use count is + * removed asynchronously during host cache update, therefore it is + * safe to hold such hostentry temorarily. There is no need to hold + * a lock for hostentry->dep table, because that table contains routes + * responsible for that hostentry, and therefore is non-empty if given + * hostentry has non-zero use count. The protocol responsible for routes + * with recursive next hops should also hold a lock for a table governing + * that routes (argument tab to rta_set_recursive_next_hop()). + */ + +static inline void rt_lock_hostentry(struct hostentry *he) { if (he) he->uc++; } +static inline void rt_unlock_hostentry(struct hostentry *he) { if (he) he->uc--; } + extern struct protocol *attr_class_to_protocol[EAP_MAX]; diff --git a/nest/rt-attr.c b/nest/rt-attr.c index abd49c70..9caee8d5 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -671,6 +671,7 @@ rta_lookup(rta *o) r = rta_copy(o); r->hash_key = h; r->aflags = RTAF_CACHED; + rt_lock_hostentry(r->hostentry); rta_insert(r); if (++rta_cache_count > rta_cache_limit) @@ -688,6 +689,7 @@ rta__free(rta *a) if (a->next) a->next->pprev = a->pprev; a->aflags = 0; /* Poison the entry */ + rt_unlock_hostentry(a->hostentry); ea_free(a->eattrs); sl_free(rta_slab, a); } diff --git a/nest/rt-table.c b/nest/rt-table.c index 8cca42a7..b5256945 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -51,6 +51,13 @@ static linpool *rte_update_pool; static list routing_tables; static void rt_format_via(rte *e, byte *via); +static void rt_free_hostcache(rtable *tab); +static void rt_notify_hostcache(rtable *tab, net *net); +static void rt_update_hostcache(rtable *tab); +static void rt_next_hop_update(rtable *tab); +static void rt_prune(rtable *tab); + +static inline void rt_schedule_gc(rtable *tab); static void rte_init(struct fib_node *N) @@ -210,7 +217,7 @@ do_rte_announce(struct announce_hook *a, int type UNUSED, net *net, rte *new, rt * This is a tricky part - we don't know whether route 'old' was * exported to protocol 'p' or was filtered by the export filter. * We try tu run the export filter to know this to have a correct - * value in 'old' argument of rt_update (and proper filter value) + * value in 'old' argument of rte_update (and proper filter value) * * FIXME - this is broken because 'configure soft' may change * filters but keep routes. Refeed is expected to be called after @@ -327,6 +334,9 @@ rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, ea_list * new->attrs->proto->stats.pref_routes++; if (old) old->attrs->proto->stats.pref_routes--; + + if (tab->hostcache) + rt_notify_hostcache(tab, net); } WALK_LIST(a, tab->hooks) @@ -337,6 +347,7 @@ rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, ea_list * } } + static inline int rte_validate(rte *e) { @@ -469,7 +480,6 @@ rte_recalculate(rtable *table, net *net, struct proto *p, struct proto *src, rte rte_announce(table, RA_ANY, net, new, old, tmpa); - if (new && rte_better(new, old_best)) { /* The first case - the new route is cleary optimal, we link it @@ -523,7 +533,7 @@ rte_recalculate(rtable *table, net *net, struct proto *p, struct proto *src, rte } else if (table->gc_counter++ >= table->config->gc_max_ops && table->gc_time + table->config->gc_min_time <= now) - ev_schedule(table->gc_event); + rt_schedule_gc(table); } else if (new) { @@ -688,6 +698,21 @@ drop: rte_update_unlock(); } +/* Independent call to rte_announce(), used from next hop + recalculation, outside of rte_update(). new must be non-NULL */ +static inline void +rte_announce_i(rtable *tab, unsigned type, net *n, rte *new, rte *old) +{ + struct proto *src; + ea_list *tmpa; + + rte_update_lock(); + src = new->attrs->proto; + tmpa = src->make_tmp_attrs ? src->make_tmp_attrs(new, rte_update_pool) : NULL; + rte_announce(tab, type, n, new, old, tmpa); + rte_update_unlock(); +} + void rte_discard(rtable *t, rte *old) /* Non-filtered route deletion, used during garbage collection */ { @@ -760,14 +785,49 @@ rt_dump_all(void) rt_dump(t); } -static void -rt_gc(void *tab) +static inline void +rt_schedule_gc(rtable *tab) { - rtable *t = tab; + if (tab->gc_scheduled) + return; - DBG("Entered routing table garbage collector for %s after %d seconds and %d deletes\n", - t->name, (int)(now - t->gc_time), t->gc_counter); - rt_prune(t); + tab->gc_scheduled = 1; + ev_schedule(tab->rt_event); +} + +static inline void +rt_schedule_hcu(rtable *tab) +{ + if (tab->hcu_scheduled) + return; + + tab->hcu_scheduled = 1; + ev_schedule(tab->rt_event); +} + +static inline void +rt_schedule_nhu(rtable *tab) +{ + if (tab->nhu_state == 0) + ev_schedule(tab->rt_event); + + /* state change 0->1, 2->3 */ + tab->nhu_state |= 1; +} + +static void +rt_event(void *ptr) +{ + rtable *tab = ptr; + + if (tab->hcu_scheduled) + rt_update_hostcache(tab); + + if (tab->nhu_state) + rt_next_hop_update(tab); + + if (tab->gc_scheduled) + rt_prune(tab); } void @@ -780,9 +840,9 @@ rt_setup(pool *p, rtable *t, char *name, struct rtable_config *cf) init_list(&t->hooks); if (cf) { - t->gc_event = ev_new(p); - t->gc_event->hook = rt_gc; - t->gc_event->data = t; + t->rt_event = ev_new(p); + t->rt_event->hook = rt_event; + t->rt_event->data = t; t->gc_time = now; } } @@ -811,7 +871,7 @@ rt_init(void) * the routing table and removes all routes belonging to inactive * protocols and also stale network entries. */ -void +static void rt_prune(rtable *tab) { struct fib_iterator fit; @@ -852,6 +912,7 @@ again: #endif tab->gc_counter = 0; tab->gc_time = now; + tab->gc_scheduled = 0; } /** @@ -868,6 +929,151 @@ rt_prune_all(void) rt_prune(t); } +void +rt_preconfig(struct config *c) +{ + struct symbol *s = cf_find_symbol("master"); + + init_list(&c->tables); + c->master_rtc = rt_new_table(s); +} + + +/* + * Some functions for handing internal next hop updates + * triggered by rt_schedule_nhu(). + */ + +static inline int +hostentry_diff(struct hostentry *he, struct iface *iface, ip_addr gw, byte dest) +{ + return (he->iface != iface) || !ipa_equal(he->gw, gw) || (he->dest != dest); +} + +static inline int +rta_next_hop_outdated(rta *a) +{ + struct hostentry *he = a->hostentry; + return he && hostentry_diff(he, a->iface, a->gw, a->dest); +} + +static inline void +rta_apply_hostentry(rta *a, struct hostentry *he) +{ + a->hostentry = he; + a->iface = he->iface; + a->gw = he->gw; + a->dest = he->dest; +} + +static inline rte * +rt_next_hop_update_rte(rtable *tab, rte *old) +{ + rta a; + memcpy(&a, old->attrs, sizeof(rta)); + rta_apply_hostentry(&a, old->attrs->hostentry); + a.aflags = 0; + + rte *e = sl_alloc(rte_slab); + memcpy(e, old, sizeof(rte)); + e->attrs = rta_lookup(&a); + + return e; +} + +static inline int +rt_next_hop_update_net(rtable *tab, net *n) +{ + rte **k, *e, *new, *old_best, **new_best; + int count = 0; + int free_old_best = 0; + + old_best = n->routes; + if (!old_best) + return 0; + + new_best = NULL; + + for (k = &n->routes; e = *k; k = &e->next) + { + if (rta_next_hop_outdated(e->attrs)) + { + new = rt_next_hop_update_rte(tab, e); + *k = new; + + rte_announce_i(tab, RA_ANY, n, new, e); + rte_trace_in(D_ROUTES, new->sender, new, "updated"); + + if (e != old_best) + rte_free_quick(e); + else /* Freeing of the old best rte is postponed */ + free_old_best = 1; + + e = new; + count++; + } + + if (!new_best || rte_better(e, *new_best)) + new_best = k; + } + + /* Relink the new best route to the first position */ + new = *new_best; + if (new != n->routes) + { + *new_best = new->next; + new->next = n->routes; + n->routes = new; + } + + /* Announce the new best route */ + if (new != old_best) + { + rte_announce_i(tab, RA_OPTIMAL, n, new, old_best); + rte_trace_in(D_ROUTES, new->sender, new, "updated [best]"); + } + + if (free_old_best) + rte_free_quick(old_best); + + return count; +} + +static void +rt_next_hop_update(rtable *tab) +{ + struct fib_iterator *fit = &tab->nhu_fit; + int max_feed = 32; + + if (tab->nhu_state == 0) + return; + + if (tab->nhu_state == 1) + { + FIB_ITERATE_INIT(fit, &tab->fib); + tab->nhu_state = 2; + } + + FIB_ITERATE_START(&tab->fib, fit, fn) + { + if (max_feed <= 0) + { + FIB_ITERATE_PUT(fit, fn); + ev_schedule(tab->rt_event); + return; + } + max_feed -= rt_next_hop_update_net(tab, (net *) fn); + } + FIB_ITERATE_END(fn); + + /* state change 2->0, 3->1 */ + tab->nhu_state &= 1; + + if (tab->nhu_state > 0) + ev_schedule(tab->rt_event); +} + + struct rtable_config * rt_new_table(struct symbol *s) { @@ -881,15 +1087,6 @@ rt_new_table(struct symbol *s) return c; } -void -rt_preconfig(struct config *c) -{ - struct symbol *s = cf_find_symbol("master"); - - init_list(&c->tables); - c->master_rtc = rt_new_table(s); -} - /** * rt_lock_table - lock a routing table * @r: routing table to be locked @@ -919,8 +1116,11 @@ rt_unlock_table(rtable *r) { struct config *conf = r->deleted; DBG("Deleting routing table %s\n", r->name); + if (r->hostcache) + rt_free_hostcache(r); rem_node(&r->n); fib_free(&r->fib); + rfree(r->rt_event); mb_free(r); config_del_obstacle(conf); } @@ -1087,6 +1287,178 @@ rt_feed_baby_abort(struct proto *p) } } +static void +hostentry_init(struct fib_node *fn) +{ + ((struct hostentry *) fn)->uc = 0; + ((struct hostentry *) fn)->tab = NULL; +} + +static void +rt_init_hostcache(rtable *tab) +{ + struct hostcache *hc = mb_allocz(rt_table_pool, sizeof(struct hostcache)); + init_list(&hc->hostentries); + fib_init(&hc->htable, rt_table_pool, sizeof(struct hostentry), 0, hostentry_init); + tab->hostcache = hc; +} + +static void +rt_free_hostcache(rtable *tab) +{ + struct hostcache *hc = tab->hostcache; + + node *n; + WALK_LIST(n, hc->hostentries) + { + struct hostentry *he = SKIP_BACK(struct hostentry, ln, n); + if (he->uc) + log(L_ERR "Hostcache is not empty in table %s", tab->name); + } + + fib_free(&hc->htable); + mb_free(hc); +} + +static void +rt_notify_hostcache(rtable *tab, net *net) +{ + struct hostcache *hc = tab->hostcache; + + if (tab->hcu_scheduled) + return; + + node *n; + WALK_LIST(n, hc->hostentries) + { + struct hostentry *he = SKIP_BACK(struct hostentry, ln, n); + if (ipa_in_net(he->fn.prefix, net->n.prefix, net->n.pxlen) && + (he->pxlen <= net->n.pxlen)) + { + rt_schedule_hcu(tab); + return; + } + } +} + +static int +if_local_addr(ip_addr a, struct iface *i) +{ + struct ifa *b; + + WALK_LIST(b, i->addrs) + if (ipa_equal(a, b->ip)) + return 1; + + return 0; +} + +static int +rt_update_hostentry(rtable *tab, struct hostentry *he) +{ + struct iface *old_iface = he->iface; + ip_addr old_gw = he->gw; + byte old_dest = he->dest; + + net *n = fib_route(&tab->fib, he->fn.prefix, MAX_PREFIX_LENGTH); + if (n && n->routes) + { + rta *a = n->routes->attrs; + + if (a->dest == RTD_DEVICE) + { + if (if_local_addr(he->fn.prefix, a->iface)) + { + /* The host address is a local address, this is not valid */ + log(L_WARN "Next hop address %I is a local address of iface %s", + he->fn.prefix, a->iface->name); + he->iface = NULL; + he->gw = IPA_NONE; + he->dest = RTD_UNREACHABLE; + } + else + { + /* The host is directly reachable, us it as a gateway */ + he->iface = a->iface; + he->gw = he->fn.prefix; + he->dest = RTD_ROUTER; + } + } + else + { + /* The host is reachable through some route entry */ + he->iface = a->iface; + he->gw = a->gw; + he->dest = a->dest; + } + + he->pxlen = n->n.pxlen; + } + else + { + /* The host is unreachable */ + he->iface = NULL; + he->gw = IPA_NONE; + he->dest = RTD_UNREACHABLE; + + he->pxlen = 0; + } + + return hostentry_diff(he, old_iface, old_gw, old_dest); +} + +static void +rt_update_hostcache(rtable *tab) +{ + struct hostcache *hc = tab->hostcache; + struct hostentry *he; + node *n, *x; + + WALK_LIST_DELSAFE(n, x, hc->hostentries) + { + he = SKIP_BACK(struct hostentry, ln, n); + if (!he->uc) + { + /* Delete a hostentry */ + rem_node(&he->ln); + fib_delete(&hc->htable, he); + continue; + } + + if (rt_update_hostentry(tab, he)) + rt_schedule_nhu(he->tab); + } + + tab->hcu_scheduled = 0; +} + +static struct hostentry * +rt_find_hostentry(rtable *tab, ip_addr *a, rtable *dep) +{ + struct hostentry *he; + + if (!tab->hostcache) + rt_init_hostcache(tab); + + he = fib_get(&tab->hostcache->htable, a, MAX_PREFIX_LENGTH); + if (!he->tab) + { + /* New entry */ + add_tail(&tab->hostcache->hostentries, &he->ln); + he->tab = dep; + + rt_update_hostentry(tab, he); + } + + return he; +} + +void +rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr *gw) +{ + rta_apply_hostentry(a, rt_find_hostentry(tab, gw, dep)); +} + /* * CLI commands */ diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index c1b8fd1e..8743358e 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -1265,15 +1265,13 @@ bgp_decode_attrs(struct bgp_conn *conn, byte *attr, unsigned int len, struct lin ea_list *ea; struct adata *ad; + bzero(a, sizeof(rta)); a->proto = &bgp->p; a->source = RTS_BGP; a->scope = SCOPE_UNIVERSE; a->cast = RTC_UNICAST; - a->dest = RTD_ROUTER; - a->flags = 0; - a->aflags = 0; + /* a->dest = RTD_ROUTER; -- set in bgp_set_next_hop() */ a->from = bgp->cf->remote_ip; - a->eattrs = NULL; /* Parse the attributes */ bzero(seen, sizeof(seen)); diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 932c8aa3..b36c4a3d 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -786,6 +786,8 @@ bgp_start(struct proto *P) p->incoming_conn.state = BS_IDLE; p->neigh = NULL; + rt_lock_table(p->igp_table); + p->event = ev_new(p->p.pool); p->event->hook = bgp_decision; p->event->data = p; @@ -837,6 +839,19 @@ bgp_shutdown(struct proto *P) return p->p.proto_state; } +static void +bgp_cleanup(struct proto *P) +{ + struct bgp_proto *p = (struct bgp_proto *) P; + rt_unlock_table(p->igp_table); +} + +static rtable * +get_igp_table(struct bgp_config *cf) +{ + return cf->igp_table ? cf->igp_table->table : cf->c.table->table; +} + static struct proto * bgp_init(struct proto_config *C) { @@ -854,6 +869,7 @@ bgp_init(struct proto_config *C) p->local_as = c->local_as; p->remote_as = c->remote_as; p->is_internal = (c->local_as == c->remote_as); + p->igp_table = get_igp_table(c); return P; } @@ -1065,7 +1081,8 @@ bgp_reconfigure(struct proto *P, struct proto_config *C) // password item is last and must be checked separately OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config)) && ((!old->password && !new->password) - || (old->password && new->password && !strcmp(old->password, new->password))); + || (old->password && new->password && !strcmp(old->password, new->password))) + && (get_igp_table(old) == get_igp_table(new)); /* We should update our copy of configuration ptr as old configuration will be freed */ if (same) @@ -1081,6 +1098,7 @@ struct protocol proto_bgp = { init: bgp_init, start: bgp_start, shutdown: bgp_shutdown, + cleanup: bgp_cleanup, reconfigure: bgp_reconfigure, get_status: bgp_get_status, get_attr: bgp_get_attr, diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index c81fe624..76844af3 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -47,6 +47,7 @@ struct bgp_config { unsigned error_delay_time_max; unsigned disable_after_error; /* Disable the protocol when error is detected */ char *password; /* Password used for MD5 authentication */ + struct rtable_config *igp_table; /* Table used for recursive next hop lookups */ }; #define MLL_SELF 1 @@ -92,6 +93,7 @@ struct bgp_proto { struct neighbor *neigh; /* Neighbor entry corresponding to next_hop */ ip_addr local_addr; /* Address of the local end of the link to next_hop */ ip_addr source_addr; /* Address used as advertised next hop, usually local_addr */ + rtable *igp_table; /* Table used for recursive next hop lookups */ struct event *event; /* Event for respawning and shutting process */ struct timer *startup_timer; /* Timer used to delay protocol startup due to previous errors (startup_delay) */ struct bgp_bucket **bucket_hash; /* Hash table of attribute buckets */ diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index c4ed1032..a46431cb 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -24,7 +24,7 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE, PASSWORD, RR, RS, CLIENT, CLUSTER, ID, AS4, ADVERTISE, IPV4, CAPABILITIES, LIMIT, PASSIVE, PREFER, OLDER, MISSING, LLADDR, DROP, IGNORE, ROUTE, REFRESH, INTERPRET, COMMUNITIES, - BGP_ORIGINATOR_ID, BGP_CLUSTER_LIST) + BGP_ORIGINATOR_ID, BGP_CLUSTER_LIST, IGP, TABLE) CF_GRAMMAR @@ -89,6 +89,7 @@ bgp_proto: | bgp_proto ROUTE LIMIT expr ';' { BGP_CFG->route_limit = $4; } | bgp_proto PASSIVE bool ';' { BGP_CFG->passive = $3; } | bgp_proto INTERPRET COMMUNITIES bool ';' { BGP_CFG->interpret_communities = $4; } + | bgp_proto IGP TABLE rtable ';' { BGP_CFG->igp_table = $4; } ; CF_ADDTO(dynamic_attr, BGP_ORIGIN diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 1e9d6465..514e878d 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -802,26 +802,26 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len) } while (0) static inline int -bgp_get_nexthop(struct bgp_proto *bgp, rta *a) +bgp_set_next_hop(struct bgp_proto *p, rta *a) { - neighbor *neigh; - ip_addr nexthop; struct eattr *nh = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP)); - ASSERT(nh); - nexthop = *(ip_addr *) nh->u.ptr->data; - neigh = neigh_find(&bgp->p, &nexthop, 0); - if (neigh) + ip_addr nexthop = *(ip_addr *) nh->u.ptr->data; + + if (!p->is_internal) /* FIXME better option + */ { - if (neigh->scope == SCOPE_HOST) - { - DBG("BGP: Loop!\n"); - return 0; - } + neighbor *ng = neigh_find(&p->p, &nexthop, 0) ? : p->neigh; + if (ng->scope == SCOPE_HOST) + return 0; + + a->dest = RTD_ROUTER; + a->gw = ng->addr; + a->iface = ng->iface; + a->hostentry = NULL; } else - neigh = bgp->neigh; - a->gw = neigh->addr; - a->iface = neigh->iface; + rta_set_recursive_next_hop(p->p.table, a, p->igp_table, &nexthop); + return 1; } @@ -853,7 +853,7 @@ bgp_do_rx_update(struct bgp_conn *conn, return; a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, nlri_len); - if (a0 && nlri_len && bgp_get_nexthop(p, a0)) + if (a0 && nlri_len && bgp_set_next_hop(p, a0)) { a = rta_lookup(a0); while (nlri_len) diff --git a/proto/pipe/pipe.c b/proto/pipe/pipe.c index 7fdf2733..e557097d 100644 --- a/proto/pipe/pipe.c +++ b/proto/pipe/pipe.c @@ -63,6 +63,7 @@ pipe_rt_notify(struct proto *P, rtable *src_table, net *n, rte *new, rte *old, e a.aflags = 0; a.eattrs = attrs; + a.hostentry = NULL; e = rte_get_temp(&a); e->net = nn; e->pflags = 0; @@ -120,7 +121,7 @@ pipe_start(struct proto *P) /* Clean up the secondary stats */ bzero(&p->peer_stats, sizeof(struct proto_stats)); - /* Lock the peer table, unlock is handled in proto_fell_down() */ + /* Lock the peer table, unlock is handled in pipe_cleanup() */ rt_lock_table(p->peer); /* Connect the protocol also to the peer routing table. */ @@ -129,6 +130,13 @@ pipe_start(struct proto *P) return PS_UP; } +static void +pipe_cleanup(struct proto *P) +{ + struct pipe_proto *p = (struct pipe_proto *) P; + rt_unlock_table(p->peer); +} + static struct proto * pipe_init(struct proto_config *C) { @@ -185,6 +193,7 @@ struct protocol proto_pipe = { postconfig: pipe_postconfig, init: pipe_init, start: pipe_start, + cleanup: pipe_cleanup, reconfigure: pipe_reconfigure, get_status: pipe_get_status, }; diff --git a/sysdep/bsd/krt-sock.c b/sysdep/bsd/krt-sock.c index 4c8676ef..9ae658d6 100644 --- a/sysdep/bsd/krt-sock.c +++ b/sysdep/bsd/krt-sock.c @@ -244,7 +244,6 @@ krt_set_start(struct krt_proto *x, int first UNUSED) static void krt_read_rt(struct ks_msg *msg, struct krt_proto *p, int scan) { - rta a; rte *e; net *net; sockaddr dst, gate, mask; @@ -329,17 +328,12 @@ krt_read_rt(struct ks_msg *msg, struct krt_proto *p, int scan) net = net_get(p->p.table, idst, pxlen); - bzero(&a, sizeof(a)); - - a.proto = &p->p; - a.source = RTS_INHERIT; - a.scope = SCOPE_UNIVERSE; - a.cast = RTC_UNICAST; - a.flags = a.aflags = 0; - a.from = IPA_NONE; - a.gw = IPA_NONE; - a.iface = NULL; - a.eattrs = NULL; + rta a = { + .proto = &p->p, + .source = RTS_INHERIT, + .scope = SCOPE_UNIVERSE, + .cast = RTC_UNICAST + }; /* reject/blackhole routes have also set RTF_GATEWAY, we wil check them first. */ diff --git a/sysdep/linux/krt-scan.c b/sysdep/linux/krt-scan.c index feb128ef..8591607e 100644 --- a/sysdep/linux/krt-scan.c +++ b/sysdep/linux/krt-scan.c @@ -48,7 +48,6 @@ krt_parse_entry(byte *ent, struct krt_proto *p) int masklen; net *net; byte *iface = ent; - rta a; rte *e; if (sscanf(ent, "%*s\t%x\t%x\t%x\t%*d\t%*d\t%*d\t%x\t", &dest0, &gw0, &flags, &mask0) != 4) @@ -88,14 +87,12 @@ krt_parse_entry(byte *ent, struct krt_proto *p) net = net_get(p->p.table, dest, masklen); - a.proto = &p->p; - a.source = RTS_INHERIT; - a.scope = SCOPE_UNIVERSE; - a.cast = RTC_UNICAST; - a.flags = a.aflags = 0; - a.from = IPA_NONE; - a.iface = NULL; - a.eattrs = NULL; + rta a = { + .proto = &p->p, + .source = RTS_INHERIT, + .scope = SCOPE_UNIVERSE, + .cast = RTC_UNICAST + }; if (flags & RTF_GATEWAY) { diff --git a/sysdep/linux/netlink/netlink.c b/sysdep/linux/netlink/netlink.c index a10a2e9f..2dd0359b 100644 --- a/sysdep/linux/netlink/netlink.c +++ b/sysdep/linux/netlink/netlink.c @@ -570,7 +570,6 @@ nl_parse_route(struct nlmsghdr *h, int scan) struct rtattr *a[RTA_CACHEINFO+1]; int new = h->nlmsg_type == RTM_NEWROUTE; ip_addr dst; - rta ra; rte *e; net *net; u32 oif; @@ -655,15 +654,13 @@ nl_parse_route(struct nlmsghdr *h, int scan) } net = net_get(p->p.table, dst, i->rtm_dst_len); - ra.proto = &p->p; - ra.source = RTS_INHERIT; - ra.scope = SCOPE_UNIVERSE; - ra.cast = RTC_UNICAST; - ra.flags = ra.aflags = 0; - ra.from = IPA_NONE; - ra.gw = IPA_NONE; - ra.iface = NULL; - ra.eattrs = NULL; + + rta ra = { + .proto = &p->p, + .source = RTS_INHERIT, + .scope = SCOPE_UNIVERSE, + .cast = RTC_UNICAST + }; switch (i->rtm_type) {