From 836a87b8acd5da40bde6b89702090c6616e06dfb Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Mon, 29 Nov 2021 19:23:42 +0100 Subject: [PATCH] Nest: Attach prefix trie to rtable for faster LPM and interval queries Attach a prefix trie to IP/VPN/ROA tables. Use it for net_route() and net_roa_check(). This leads to 3-5x speedups for IPv4 and 5-10x speedup for IPv6 of these calls. TODO: - Rebuild the trie during rt_prune_table() - Better way to avoid trie_add_prefix() in net_get() for existing tables - Make it configurable (?) --- lib/net.h | 1 + nest/route.h | 9 +- nest/rt-fib.c | 2 +- nest/rt-table.c | 314 ++++++++++++++++++++++++++++++++++++++------ proto/babel/babel.c | 2 +- 5 files changed, 281 insertions(+), 47 deletions(-) diff --git a/lib/net.h b/lib/net.h index 8eb4c7b9..9f4a00ad 100644 --- a/lib/net.h +++ b/lib/net.h @@ -38,6 +38,7 @@ #define NB_IP (NB_IP4 | NB_IP6) #define NB_VPN (NB_VPN4 | NB_VPN6) +#define NB_ROA (NB_ROA4 | NB_ROA6) #define NB_FLOW (NB_FLOW4 | NB_FLOW6) #define NB_DEST (NB_IP | NB_IP6_SADR | NB_VPN | NB_MPLS) #define NB_ANY 0xffffffff diff --git a/nest/route.h b/nest/route.h index f5fc9e31..fa87e22c 100644 --- a/nest/route.h +++ b/nest/route.h @@ -20,7 +20,9 @@ struct proto; struct rte_src; struct symbol; struct timer; +struct fib; struct filter; +struct f_trie; struct cli; /* @@ -49,7 +51,7 @@ struct fib_iterator { /* See lib/slists.h for an explanation */ uint hash; }; -typedef void (*fib_init_fn)(void *); +typedef void (*fib_init_fn)(struct fib *, void *); struct fib { pool *fib_pool; /* Pool holding all our data */ @@ -149,6 +151,7 @@ struct rtable_config { int gc_min_time; /* Minimum time between two consecutive GC runs */ byte sorted; /* Routes of network are sorted according to rte_better() */ byte internal; /* Internal table of a protocol */ + byte trie_used; /* Rtable has attached trie */ btime min_settle_time; /* Minimum settle time for notifications */ btime max_settle_time; /* Maximum settle time for notifications */ }; @@ -158,6 +161,7 @@ typedef struct rtable { node n; /* Node in list of all tables */ pool *rp; /* Resource pool to allocate everything from, including itself */ struct fib fib; + struct f_trie *trie; /* Trie of prefixes defined in fib */ char *name; /* Name of this table */ list channels; /* List of attached channels (struct channel) */ uint addr_type; /* Type of address data stored in table (NET_*) */ @@ -324,7 +328,8 @@ static inline net *net_find(rtable *tab, const net_addr *addr) { return (net *) static inline net *net_find_valid(rtable *tab, const net_addr *addr) { net *n = net_find(tab, addr); return (n && rte_is_valid(n->routes)) ? n : NULL; } static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } -void *net_route(rtable *tab, const net_addr *n); +net *net_get(rtable *tab, const net_addr *addr); +net *net_route(rtable *tab, const net_addr *n); int net_roa_check(rtable *tab, const net_addr *n, u32 asn); rte *rte_find(net *net, struct rte_src *src); rte *rte_get_temp(struct rta *); diff --git a/nest/rt-fib.c b/nest/rt-fib.c index a7f70371..1690a8f6 100644 --- a/nest/rt-fib.c +++ b/nest/rt-fib.c @@ -331,7 +331,7 @@ fib_get(struct fib *f, const net_addr *a) memset(b, 0, f->node_offset); if (f->init) - f->init(b); + f->init(f, b); if (f->entries++ > f->entries_max) fib_rehash(f, HASH_HI_STEP); diff --git a/nest/rt-table.c b/nest/rt-table.c index 390b3277..f4f25497 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -64,39 +64,178 @@ static inline void rt_prune_table(rtable *tab); static inline void rt_schedule_notify(rtable *tab); -/* Like fib_route(), but skips empty net entries */ -static inline void * -net_route_ip4(rtable *t, net_addr_ip4 *n) +static void +net_init_with_trie(struct fib *f, void *N) { - net *r; + rtable *tab = SKIP_BACK(rtable, fib, f); + net *n = N; - while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0)) + if (tab->trie) + trie_add_prefix(tab->trie, n->n.addr, n->n.addr->pxlen, n->n.addr->pxlen); +} + +static inline net * +net_route_ip4_trie(rtable *t, const net_addr_ip4 *n0) +{ + TRIE_WALK_TO_ROOT_IP4(t->trie, n0, n) { - n->pxlen--; - ip4_clrbit(&n->prefix, n->pxlen); + net *r; + if (r = net_find_valid(t, (net_addr *) &n)) + return r; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + +static inline net * +net_route_vpn4_trie(rtable *t, const net_addr_vpn4 *n0) +{ + TRIE_WALK_TO_ROOT_IP4(t->trie, (const net_addr_ip4 *) n0, px) + { + net_addr_vpn4 n = NET_ADDR_VPN4(px.prefix, px.pxlen, n0->rd); + + net *r; + if (r = net_find_valid(t, (net_addr *) &n)) + return r; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + +static inline net * +net_route_ip6_trie(rtable *t, const net_addr_ip6 *n0) +{ + TRIE_WALK_TO_ROOT_IP6(t->trie, n0, n) + { + net *r; + if (r = net_find_valid(t, (net_addr *) &n)) + return r; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + +static inline net * +net_route_vpn6_trie(rtable *t, const net_addr_vpn6 *n0) +{ + TRIE_WALK_TO_ROOT_IP6(t->trie, (const net_addr_ip6 *) n0, px) + { + net_addr_vpn6 n = NET_ADDR_VPN6(px.prefix, px.pxlen, n0->rd); + + net *r; + if (r = net_find_valid(t, (net_addr *) &n)) + return r; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + +static inline void * +net_route_ip6_sadr_trie(rtable *t, const net_addr_ip6_sadr *n0) +{ + TRIE_WALK_TO_ROOT_IP6(t->trie, (const net_addr_ip6 *) n0, px) + { + net_addr_ip6_sadr n = NET_ADDR_IP6_SADR(px.prefix, px.pxlen, n0->src_prefix, n0->src_pxlen); + net *best = NULL; + int best_pxlen = 0; + + /* We need to do dst first matching. Since sadr addresses are hashed on dst + prefix only, find the hash table chain and go through it to find the + match with the longest matching src prefix. */ + for (struct fib_node *fn = fib_get_chain(&t->fib, (net_addr *) &n); fn; fn = fn->next) + { + net_addr_ip6_sadr *a = (void *) fn->addr; + + if (net_equal_dst_ip6_sadr(&n, a) && + net_in_net_src_ip6_sadr(&n, a) && + (a->src_pxlen >= best_pxlen)) + { + best = fib_node_to_user(&t->fib, fn); + best_pxlen = a->src_pxlen; + } + } + + if (best) + return best; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + +static inline net * +net_route_ip4_fib(rtable *t, const net_addr_ip4 *n0) +{ + net_addr_ip4 n; + net_copy_ip4(&n, n0); + + net *r; + while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0)) + { + n.pxlen--; + ip4_clrbit(&n.prefix, n.pxlen); + } + + return r; +} + +static inline net * +net_route_vpn4_fib(rtable *t, const net_addr_vpn4 *n0) +{ + net_addr_vpn4 n; + net_copy_vpn4(&n, n0); + + net *r; + while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0)) + { + n.pxlen--; + ip4_clrbit(&n.prefix, n.pxlen); + } + + return r; +} + +static inline net * +net_route_ip6_fib(rtable *t, const net_addr_ip6 *n0) +{ + net_addr_ip6 n; + net_copy_ip6(&n, n0); + + net *r; + while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0)) + { + n.pxlen--; + ip6_clrbit(&n.prefix, n.pxlen); + } + + return r; +} + +static inline net * +net_route_vpn6_fib(rtable *t, const net_addr_vpn6 *n0) +{ + net_addr_vpn6 n; + net_copy_vpn6(&n, n0); + + net *r; + while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0)) + { + n.pxlen--; + ip6_clrbit(&n.prefix, n.pxlen); } return r; } static inline void * -net_route_ip6(rtable *t, net_addr_ip6 *n) +net_route_ip6_sadr_fib(rtable *t, const net_addr_ip6_sadr *n0) { - net *r; - - while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0)) - { - n->pxlen--; - ip6_clrbit(&n->prefix, n->pxlen); - } - - return r; -} - -static inline void * -net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n) -{ - struct fib_node *fn; + net_addr_ip6_sadr n; + net_copy_ip6_sadr(&n, n0); while (1) { @@ -105,13 +244,13 @@ net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n) /* We need to do dst first matching. Since sadr addresses are hashed on dst prefix only, find the hash table chain and go through it to find the - match with the smallest matching src prefix. */ - for (fn = fib_get_chain(&t->fib, (net_addr *) n); fn; fn = fn->next) + match with the longest matching src prefix. */ + for (struct fib_node *fn = fib_get_chain(&t->fib, (net_addr *) &n); fn; fn = fn->next) { net_addr_ip6_sadr *a = (void *) fn->addr; - if (net_equal_dst_ip6_sadr(n, a) && - net_in_net_src_ip6_sadr(n, a) && + if (net_equal_dst_ip6_sadr(&n, a) && + net_in_net_src_ip6_sadr(&n, a) && (a->src_pxlen >= best_pxlen)) { best = fib_node_to_user(&t->fib, fn); @@ -122,38 +261,52 @@ net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n) if (best) return best; - if (!n->dst_pxlen) + if (!n.dst_pxlen) break; - n->dst_pxlen--; - ip6_clrbit(&n->dst_prefix, n->dst_pxlen); + n.dst_pxlen--; + ip6_clrbit(&n.dst_prefix, n.dst_pxlen); } return NULL; } -void * +net * net_route(rtable *tab, const net_addr *n) { ASSERT(tab->addr_type == n->type); - net_addr *n0 = alloca(n->length); - net_copy(n0, n); - switch (n->type) { case NET_IP4: + if (tab->trie) + return net_route_ip4_trie(tab, (net_addr_ip4 *) n); + else + return net_route_ip4_fib (tab, (net_addr_ip4 *) n); + case NET_VPN4: - case NET_ROA4: - return net_route_ip4(tab, (net_addr_ip4 *) n0); + if (tab->trie) + return net_route_vpn4_trie(tab, (net_addr_vpn4 *) n); + else + return net_route_vpn4_fib (tab, (net_addr_vpn4 *) n); case NET_IP6: + if (tab->trie) + return net_route_ip6_trie(tab, (net_addr_ip6 *) n); + else + return net_route_ip6_fib (tab, (net_addr_ip6 *) n); + case NET_VPN6: - case NET_ROA6: - return net_route_ip6(tab, (net_addr_ip6 *) n0); + if (tab->trie) + return net_route_vpn6_trie(tab, (net_addr_vpn6 *) n); + else + return net_route_vpn6_fib (tab, (net_addr_vpn6 *) n); case NET_IP6_SADR: - return net_route_ip6_sadr(tab, (net_addr_ip6_sadr *) n0); + if (tab->trie) + return net_route_ip6_sadr_trie(tab, (net_addr_ip6_sadr *) n); + else + return net_route_ip6_sadr_fib (tab, (net_addr_ip6_sadr *) n); default: return NULL; @@ -162,7 +315,35 @@ net_route(rtable *tab, const net_addr *n) static int -net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn) +net_roa_check_ip4_trie(rtable *tab, const net_addr_ip4 *px, u32 asn) +{ + int anything = 0; + + TRIE_WALK_TO_ROOT_IP4(tab->trie, px, px0) + { + net_addr_roa4 roa0 = NET_ADDR_ROA4(px0.prefix, px0.pxlen, 0, 0); + + struct fib_node *fn; + for (fn = fib_get_chain(&tab->fib, (net_addr *) &roa0); fn; fn = fn->next) + { + net_addr_roa4 *roa = (void *) fn->addr; + net *r = fib_node_to_user(&tab->fib, fn); + + if (net_equal_prefix_roa4(roa, &roa0) && rte_is_valid(r->routes)) + { + anything = 1; + if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) + return ROA_VALID; + } + } + } + TRIE_WALK_TO_ROOT_END; + + return anything ? ROA_INVALID : ROA_UNKNOWN; +} + +static int +net_roa_check_ip4_fib(rtable *tab, const net_addr_ip4 *px, u32 asn) { struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0); struct fib_node *fn; @@ -194,7 +375,35 @@ net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn) } static int -net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn) +net_roa_check_ip6_trie(rtable *tab, const net_addr_ip6 *px, u32 asn) +{ + int anything = 0; + + TRIE_WALK_TO_ROOT_IP6(tab->trie, px, px0) + { + net_addr_roa6 roa0 = NET_ADDR_ROA6(px0.prefix, px0.pxlen, 0, 0); + + struct fib_node *fn; + for (fn = fib_get_chain(&tab->fib, (net_addr *) &roa0); fn; fn = fn->next) + { + net_addr_roa6 *roa = (void *) fn->addr; + net *r = fib_node_to_user(&tab->fib, fn); + + if (net_equal_prefix_roa6(roa, &roa0) && rte_is_valid(r->routes)) + { + anything = 1; + if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) + return ROA_VALID; + } + } + } + TRIE_WALK_TO_ROOT_END; + + return anything ? ROA_INVALID : ROA_UNKNOWN; +} + +static int +net_roa_check_ip6_fib(rtable *tab, const net_addr_ip6 *px, u32 asn) { struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0); struct fib_node *fn; @@ -244,9 +453,19 @@ int net_roa_check(rtable *tab, const net_addr *n, u32 asn) { if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4)) - return net_roa_check_ip4(tab, (const net_addr_ip4 *) n, asn); + { + if (tab->trie) + return net_roa_check_ip4_trie(tab, (const net_addr_ip4 *) n, asn); + else + return net_roa_check_ip4_fib (tab, (const net_addr_ip4 *) n, asn); + } else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6)) - return net_roa_check_ip6(tab, (const net_addr_ip6 *) n, asn); + { + if (tab->trie) + return net_roa_check_ip6_trie(tab, (const net_addr_ip6 *) n, asn); + else + return net_roa_check_ip6_fib (tab, (const net_addr_ip6 *) n, asn); + } else return ROA_UNKNOWN; /* Should not happen */ } @@ -1940,6 +2159,14 @@ rt_setup(pool *pp, struct rtable_config *cf) fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL); + if (cf->trie_used) + { + t->trie = f_new_trie(lp_new_default(p), 0); + t->trie->ipv4 = net_val_match(t->addr_type, NB_IP4 | NB_VPN4 | NB_ROA4); + + t->fib.init = net_init_with_trie; + } + if (!(t->internal = cf->internal)) { init_list(&t->channels); @@ -2352,6 +2579,7 @@ rt_new_table(struct symbol *s, uint addr_type) c->gc_min_time = 5; c->min_settle_time = 1 S; c->max_settle_time = 20 S; + c->trie_used = net_val_match(addr_type, NB_IP | NB_VPN | NB_ROA | NB_IP6_SADR); add_tail(&new_config->tables, &c->n); diff --git a/proto/babel/babel.c b/proto/babel/babel.c index 1e87212c..e43818f5 100644 --- a/proto/babel/babel.c +++ b/proto/babel/babel.c @@ -63,7 +63,7 @@ static inline void babel_iface_kick_timer(struct babel_iface *ifa); */ static void -babel_init_entry(void *E) +babel_init_entry(struct fib *f UNUSED, void *E) { struct babel_entry *e = E;