Trie: Implement trie walking code

Trie walking allows enumeration of prefixes in a trie in the usual
lexicographic order. Optionally, trie enumeration can be restricted
to a chosen subnet (and its descendants).
This commit is contained in:
Ondrej Zajicek (work) 2021-11-19 18:04:32 +01:00
parent 71c18d9f53
commit 062e69bf52
3 changed files with 413 additions and 13 deletions

View file

@ -140,7 +140,8 @@ struct f_tree {
void *data; void *data;
}; };
#define TRIE_STEP 4 #define TRIE_STEP 4
#define TRIE_STACK_LENGTH 33
struct f_trie_node4 struct f_trie_node4
{ {
@ -175,6 +176,16 @@ struct f_trie
struct f_trie_node root; /* Root trie node */ struct f_trie_node root; /* Root trie node */
}; };
struct f_trie_walk_state
{
u8 ipv4;
u8 accept_length; /* Current inter-node prefix position */
u8 start_pos; /* Initial prefix position in stack[0] */
u8 local_pos; /* Current intra-node prefix position */
u8 stack_pos; /* Current node in stack below */
const struct f_trie_node *stack[TRIE_STACK_LENGTH];
};
struct f_tree *f_new_tree(void); struct f_tree *f_new_tree(void);
struct f_tree *build_tree(struct f_tree *); struct f_tree *build_tree(struct f_tree *);
const struct f_tree *find_tree(const struct f_tree *t, const struct f_val *val); const struct f_tree *find_tree(const struct f_tree *t, const struct f_val *val);
@ -185,9 +196,19 @@ void tree_walk(const struct f_tree *t, void (*hook)(const struct f_tree *, void
struct f_trie *f_new_trie(linpool *lp, uint data_size); struct f_trie *f_new_trie(linpool *lp, uint data_size);
void *trie_add_prefix(struct f_trie *t, const net_addr *n, uint l, uint h); void *trie_add_prefix(struct f_trie *t, const net_addr *n, uint l, uint h);
int trie_match_net(const struct f_trie *t, const net_addr *n); int trie_match_net(const struct f_trie *t, const net_addr *n);
void trie_walk_init(struct f_trie_walk_state *s, const struct f_trie *t, const net_addr *from);
int trie_walk_next(struct f_trie_walk_state *s, net_addr *net);
int trie_same(const struct f_trie *t1, const struct f_trie *t2); int trie_same(const struct f_trie *t1, const struct f_trie *t2);
void trie_format(const struct f_trie *t, buffer *buf); void trie_format(const struct f_trie *t, buffer *buf);
#define TRIE_WALK(trie, net, from) ({ \
net_addr net; \
struct f_trie_walk_state tws_; \
trie_walk_init(&tws_, trie, from); \
while (trie_walk_next(&tws_, &net))
#define TRIE_WALK_END })
#define F_CMP_ERROR 999 #define F_CMP_ERROR 999
const char *f_type_name(enum f_type t); const char *f_type_name(enum f_type t);

View file

@ -1,8 +1,8 @@
/* /*
* Filters: Trie for prefix sets * Filters: Trie for prefix sets
* *
* (c) 2009--2020 Ondrej Zajicek <santiago@crfreenet.org> * (c) 2009--2021 Ondrej Zajicek <santiago@crfreenet.org>
* (c) 2009--2020 CZ.NIC z.s.p.o. * (c) 2009--2021 CZ.NIC z.s.p.o.
* *
* Can be freely distributed and used under the terms of the GNU GPL. * Can be freely distributed and used under the terms of the GNU GPL.
*/ */
@ -82,6 +82,24 @@
* - we are still on path and keep walking (node length < &plen) * - we are still on path and keep walking (node length < &plen)
* *
* The walking code in trie_match_net() is structured according to these cases. * The walking code in trie_match_net() is structured according to these cases.
*
* Iteration over prefixes in a trie can be done using TRIE_WALK() macro, or
* directly using trie_walk_init() and trie_walk_next() functions. The second
* approeach allows suspending the iteration and continuing in it later.
* Prefixes are enumerated in the usual lexicographic order and may be
* restricted to a subset of the trie (all subnets of a specified prefix).
*
* Note that the trie walk does not reliably enumerate `implicit' prefixes
* defined by &low and &high fields in prefix patterns, it is supposed to be
* used on tries constructed from `explicit' prefixes (&low == &plen == &high
* in call to trie_add_prefix()).
*
* The trie walk has three basic state variables stored in the struct
* &f_trie_walk_state -- the current node in &stack[stack_pos], &accept_length
* for iteration over inter-node prefixes (non-branching prefixes on compressed
* path between the current node and its parent node, stored in the bitmap
* &accept of the current node) and &local_pos for iteration over intra-node
* prefixes (stored in the bitmap &local).
*/ */
#include "nest/bird.h" #include "nest/bird.h"
@ -224,7 +242,7 @@ trie_amask_to_local(ip_addr px, ip_addr amask, uint nlen)
#define ADD_LOCAL(N,X,V) ({ uint v_ = (V); if (X) (N)->v4.local |= v_; else (N)->v6.local |= v_; }) #define ADD_LOCAL(N,X,V) ({ uint v_ = (V); if (X) (N)->v4.local |= v_; else (N)->v6.local |= v_; })
#define GET_CHILD(N,F,X,I) ((X) ? (struct f_trie_node *) (N)->v4.c[I] : (struct f_trie_node *) (N)->v6.c[I]) #define GET_CHILD(N,X,I) ((X) ? (struct f_trie_node *) (N)->v4.c[I] : (struct f_trie_node *) (N)->v6.c[I])
static void * static void *
@ -312,7 +330,7 @@ trie_add_node(struct f_trie *t, uint plen, ip_addr px, uint local, uint l, uint
/* n->plen < plen and plen <= 32 (128) */ /* n->plen < plen and plen <= 32 (128) */
o = n; o = n;
n = GET_CHILD(n, c, v4, ipa_getbits(paddr, nlen, TRIE_STEP)); n = GET_CHILD(n, v4, ipa_getbits(paddr, nlen, TRIE_STEP));
} }
/* We add new tail node 'a' after node 'o' */ /* We add new tail node 'a' after node 'o' */
@ -522,6 +540,225 @@ trie_match_net(const struct f_trie *t, const net_addr *n)
} }
} }
#define SAME_PREFIX(A,B,X,L) ((X) ? ip4_prefix_equal((A)->v4.addr, net4_prefix(B), (L)) : ip6_prefix_equal((A)->v6.addr, net6_prefix(B), (L)))
#define GET_NET_BITS(N,X,A,B) ((X) ? ip4_getbits(net4_prefix(N), (A), (B)) : ip6_getbits(net6_prefix(N), (A), (B)))
/**
* trie_walk_init
* @s: walk state
* @t: trie
* @net: optional subnet for walk
*
* Initialize walk state for subsequent walk through nodes of the trie @t by
* trie_walk_next(). The argument @net allows to restrict walk to given subnet,
* otherwise full walk over all nodes is used. This is done by finding node at
* or below @net and starting position in it.
*/
void
trie_walk_init(struct f_trie_walk_state *s, const struct f_trie *t, const net_addr *net)
{
*s = (struct f_trie_walk_state) {
.ipv4 = t->ipv4,
.accept_length = 0,
.start_pos = 1,
.local_pos = 1,
.stack_pos = 0,
.stack[0] = &t->root
};
if (!net)
return;
/* We want to find node of level at least plen */
int plen = ROUND_DOWN_POW2(net->pxlen, TRIE_STEP);
const struct f_trie_node *n = &t->root;
const int v4 = t->ipv4;
while (n)
{
int nlen = v4 ? n->v4.plen : n->v6.plen;
/* We are out of path */
if (!SAME_PREFIX(n, net, v4, MIN(net->pxlen, nlen)))
break;
/* We found final node */
if (nlen >= plen)
{
if (nlen == plen)
{
/* Find proper local_pos, while accept_length is not used */
int step = net->pxlen - plen;
s->start_pos = s->local_pos = (1u << step) + GET_NET_BITS(net, v4, plen, step);
s->accept_length = plen;
}
else
{
/* Start from pos 1 in local node, but first try accept mask */
s->accept_length = net->pxlen;
}
s->stack[0] = n;
return;
}
/* Choose child */
n = GET_CHILD(n, v4, GET_NET_BITS(net, v4, nlen, TRIE_STEP));
}
s->stack[0] = NULL;
return;
}
#define GET_ACCEPT_BIT(N,X,B) ((X) ? ip4_getbit((N)->v4.accept, (B)) : ip6_getbit((N)->v6.accept, (B)))
#define GET_LOCAL_BIT(N,X,B) (((X) ? (N)->v4.local : (N)->v6.local) & (1u << (B)))
/**
* trie_walk_next
* @s: walk state
* @net: return value
*
* Find the next prefix in the trie walk and return it in the buffer @net.
* Prefixes are walked in the usual lexicographic order and may be restricted
* to a subset of the trie during walk setup by trie_walk_init(). Note that the
* trie walk does not iterate reliably over 'implicit' prefixes defined by &low
* and &high fields in prefix patterns, it is supposed to be used on tries
* constructed from 'explicit' prefixes (&low == &plen == &high in call to
* trie_add_prefix()).
*
* Result: 1 if the next prefix was found, 0 for the end of walk.
*/
int
trie_walk_next(struct f_trie_walk_state *s, net_addr *net)
{
const struct f_trie_node *n = s->stack[s->stack_pos];
int len = s->accept_length;
int pos = s->local_pos;
int v4 = s->ipv4;
/*
* The walk has three basic state variables -- n, len and pos. In each node n,
* we first walk superprefixes (by len in &accept bitmask), and then we walk
* internal positions (by pos in &local bitmask). These positions are:
*
* 1
* 2 3
* 4 5 6 7
* 8 9 A B C D E F
*
* We walk them depth-first, including virtual positions 10-1F that are
* equivalent of position 1 in child nodes 0-F.
*/
if (!n)
{
memset(net, 0, v4 ? sizeof(net_addr_ip4) : sizeof(net_addr_ip6));
return 0;
}
next_node:;
/* Current node prefix length */
int nlen = v4 ? n->v4.plen : n->v6.plen;
/* First, check for accept prefix */
for (; len < nlen; len++)
if (GET_ACCEPT_BIT(n, v4, len - 1))
{
if (v4)
net_fill_ip4(net, ip4_and(n->v4.addr, ip4_mkmask(len)), len);
else
net_fill_ip6(net, ip6_and(n->v6.addr, ip6_mkmask(len)), len);
s->local_pos = pos;
s->accept_length = len + 1;
return 1;
}
next_pos:
/* Bottom of this node */
if (pos >= (1 << TRIE_STEP))
{
const struct f_trie_node *child = GET_CHILD(n, v4, pos - (1 << TRIE_STEP));
int dir = 0;
/* No child node */
if (!child)
{
/* Step up until return from left child (pos is even) */
do
{
/* Step up from start node */
if ((s->stack_pos == 0) && (pos == s->start_pos))
{
s->stack[0] = NULL;
memset(net, 0, v4 ? sizeof(net_addr_ip4) : sizeof(net_addr_ip6));
return 0;
}
/* Top of this node */
if (pos == 1)
{
ASSERT(s->stack_pos);
const struct f_trie_node *old = n;
/* Move to parent node */
s->stack_pos--;
n = s->stack[s->stack_pos];
nlen = v4 ? n->v4.plen : n->v6.plen;
pos = v4 ?
ip4_getbits(old->v4.addr, nlen, TRIE_STEP) :
ip6_getbits(old->v6.addr, nlen, TRIE_STEP);
pos += (1 << TRIE_STEP);
len = nlen;
ASSERT(GET_CHILD(n, v4, pos - (1 << TRIE_STEP)) == old);
}
/* Step up */
dir = pos % 2;
pos = pos / 2;
}
while (dir);
/* Continue with step down to the right child */
pos = 2 * pos + 1;
goto next_pos;
}
/* Move to child node */
pos = 1;
len = nlen + TRIE_STEP;
s->stack_pos++;
n = s->stack[s->stack_pos] = child;
goto next_node;
}
/* Check for local prefix */
if (GET_LOCAL_BIT(n, v4, pos))
{
/* Convert pos to address of local network */
int x = (pos >= 2) + (pos >= 4) + (pos >= 8);
int y = pos & ((1u << x) - 1);
if (v4)
net_fill_ip4(net, !x ? n->v4.addr : ip4_setbits(n->v4.addr, nlen + x - 1, y), nlen + x);
else
net_fill_ip6(net, !x ? n->v6.addr : ip6_setbits(n->v6.addr, nlen + x - 1, y), nlen + x);
s->local_pos = 2 * pos;
s->accept_length = len;
return 1;
}
/* Step down */
pos = 2 * pos;
goto next_pos;
}
static int static int
trie_node_same4(const struct f_trie_node4 *t1, const struct f_trie_node4 *t2) trie_node_same4(const struct f_trie_node4 *t1, const struct f_trie_node4 *t2)
{ {

View file

@ -45,6 +45,13 @@ get_exp_random(void)
return n; return n;
} }
static int
compare_prefixes(const void *a, const void *b)
{
return net_compare(&((const struct f_prefix *) a)->net,
&((const struct f_prefix *) b)->net);
}
static inline int static inline int
matching_ip4_nets(const net_addr_ip4 *a, const net_addr_ip4 *b) matching_ip4_nets(const net_addr_ip4 *a, const net_addr_ip4 *b)
{ {
@ -106,11 +113,15 @@ get_random_net(net_addr *net, int v6)
} }
static void static void
get_random_prefix(struct f_prefix *px, int v6) get_random_prefix(struct f_prefix *px, int v6, int tight)
{ {
get_random_net(&px->net, v6); get_random_net(&px->net, v6);
if (bt_random() % 2) if (tight)
{
px->lo = px->hi = px->net.pxlen;
}
else if (bt_random() % 2)
{ {
px->lo = 0; px->lo = 0;
px->hi = px->net.pxlen; px->hi = px->net.pxlen;
@ -238,7 +249,7 @@ get_outer_net(net_addr *net, const struct f_prefix *src)
} }
static list * static list *
make_random_prefix_list(linpool *lp, int num, int v6) make_random_prefix_list(linpool *lp, int num, int v6, int tight)
{ {
list *prefixes = lp_allocz(lp, sizeof(struct f_prefix_node)); list *prefixes = lp_allocz(lp, sizeof(struct f_prefix_node));
init_list(prefixes); init_list(prefixes);
@ -246,7 +257,7 @@ make_random_prefix_list(linpool *lp, int num, int v6)
for (int i = 0; i < num; i++) for (int i = 0; i < num; i++)
{ {
struct f_prefix_node *px = lp_allocz(lp, sizeof(struct f_prefix_node)); struct f_prefix_node *px = lp_allocz(lp, sizeof(struct f_prefix_node));
get_random_prefix(&px->prefix, v6); get_random_prefix(&px->prefix, v6, tight);
add_tail(prefixes, &px->n); add_tail(prefixes, &px->n);
char buf[64]; char buf[64];
@ -429,7 +440,7 @@ t_match_random_net(void)
linpool *lp = lp_new_default(&root_pool); linpool *lp = lp_new_default(&root_pool);
for (int round = 0; round < TESTS_NUM; round++) for (int round = 0; round < TESTS_NUM; round++)
{ {
list *prefixes = make_random_prefix_list(lp, PREFIXES_NUM, v6); list *prefixes = make_random_prefix_list(lp, PREFIXES_NUM, v6, 0);
struct f_trie *trie = make_trie_from_prefix_list(lp, prefixes); struct f_trie *trie = make_trie_from_prefix_list(lp, prefixes);
for (int i = 0; i < PREFIX_TESTS_NUM; i++) for (int i = 0; i < PREFIX_TESTS_NUM; i++)
@ -457,7 +468,7 @@ t_match_inner_net(void)
linpool *lp = lp_new_default(&root_pool); linpool *lp = lp_new_default(&root_pool);
for (int round = 0; round < TESTS_NUM; round++) for (int round = 0; round < TESTS_NUM; round++)
{ {
list *prefixes = make_random_prefix_list(lp, PREFIXES_NUM, v6); list *prefixes = make_random_prefix_list(lp, PREFIXES_NUM, v6, 0);
struct f_trie *trie = make_trie_from_prefix_list(lp, prefixes); struct f_trie *trie = make_trie_from_prefix_list(lp, prefixes);
struct f_prefix_node *n = HEAD(*prefixes); struct f_prefix_node *n = HEAD(*prefixes);
@ -488,7 +499,7 @@ t_match_outer_net(void)
linpool *lp = lp_new_default(&root_pool); linpool *lp = lp_new_default(&root_pool);
for (int round = 0; round < TESTS_NUM; round++) for (int round = 0; round < TESTS_NUM; round++)
{ {
list *prefixes = make_random_prefix_list(lp, PREFIXES_NUM, v6); list *prefixes = make_random_prefix_list(lp, PREFIXES_NUM, v6, 0);
struct f_trie *trie = make_trie_from_prefix_list(lp, prefixes); struct f_trie *trie = make_trie_from_prefix_list(lp, prefixes);
struct f_prefix_node *n = HEAD(*prefixes); struct f_prefix_node *n = HEAD(*prefixes);
@ -613,7 +624,7 @@ t_trie_same(void)
linpool *lp = lp_new_default(&root_pool); linpool *lp = lp_new_default(&root_pool);
for (int round = 0; round < TESTS_NUM*4; round++) for (int round = 0; round < TESTS_NUM*4; round++)
{ {
list *prefixes = make_random_prefix_list(lp, 100 * PREFIXES_NUM, v6); list *prefixes = make_random_prefix_list(lp, 100 * PREFIXES_NUM, v6, 0);
struct f_trie *trie1 = f_new_trie(lp, 0); struct f_trie *trie1 = f_new_trie(lp, 0);
struct f_trie *trie2 = f_new_trie(lp, 0); struct f_trie *trie2 = f_new_trie(lp, 0);
@ -630,6 +641,136 @@ t_trie_same(void)
lp_flush(lp); lp_flush(lp);
} }
bt_bird_cleanup();
return 1;
}
static inline void
log_networks(const net_addr *a, const net_addr *b)
{
if (bt_verbose >= BT_VERBOSE_ABSOLUTELY_ALL)
{
char buf0[64];
char buf1[64];
bt_format_net(buf0, 64, a);
bt_format_net(buf1, 64, b);
bt_debug("Found %s expected %s\n", buf0, buf1);
}
}
static int
t_trie_walk(void)
{
bt_bird_init();
bt_config_parse(BT_CONFIG_SIMPLE);
linpool *lp = lp_new_default(&root_pool);
for (int round = 0; round < TESTS_NUM*8; round++)
{
int level = round / TESTS_NUM;
int v6 = level % 2;
int num = PREFIXES_NUM * (int[]){1, 10, 100, 1000}[level / 2];
int pos = 0, end = 0;
list *prefixes = make_random_prefix_list(lp, num, v6, 1);
struct f_trie *trie = make_trie_from_prefix_list(lp, prefixes);
struct f_prefix *pxset = malloc((num + 1) * sizeof(struct f_prefix));
struct f_prefix_node *n;
WALK_LIST(n, *prefixes)
pxset[pos++] = n->prefix;
memset(&pxset[pos], 0, sizeof (struct f_prefix));
qsort(pxset, num, sizeof(struct f_prefix), compare_prefixes);
/* Full walk */
bt_debug("Full walk (round %d, %d nets)\n", round, num);
pos = 0;
TRIE_WALK(trie, net, NULL)
{
log_networks(&net, &pxset[pos].net);
bt_assert(net_equal(&net, &pxset[pos].net));
/* Skip possible duplicates */
while (net_equal(&pxset[pos].net, &pxset[pos + 1].net))
pos++;
pos++;
}
TRIE_WALK_END;
bt_assert(pos == num);
bt_debug("Full walk done\n");
/* Prepare net for subnet walk - start with random prefix */
pos = bt_random() % num;
end = pos + (int[]){2, 2, 3, 4}[level / 2];
end = MIN(end, num);
struct f_prefix from = pxset[pos];
/* Find a common superprefix to several subsequent prefixes */
for (; pos < end; pos++)
{
if (net_equal(&from.net, &pxset[pos].net))
continue;
int common = !v6 ?
ip4_pxlen(net4_prefix(&from.net), net4_prefix(&pxset[pos].net)) :
ip6_pxlen(net6_prefix(&from.net), net6_prefix(&pxset[pos].net));
from.net.pxlen = MIN(from.net.pxlen, common);
if (!v6)
((net_addr_ip4 *) &from.net)->prefix =
ip4_and(net4_prefix(&from.net), net4_prefix(&pxset[pos].net));
else
((net_addr_ip6 *) &from.net)->prefix =
ip6_and(net6_prefix(&from.net), net6_prefix(&pxset[pos].net));
}
/* Fix irrelevant bits */
if (!v6)
((net_addr_ip4 *) &from.net)->prefix =
ip4_and(net4_prefix(&from.net), ip4_mkmask(net4_pxlen(&from.net)));
else
((net_addr_ip6 *) &from.net)->prefix =
ip6_and(net6_prefix(&from.net), ip6_mkmask(net6_pxlen(&from.net)));
/* Find initial position for final prefix */
for (pos = 0; pos < num; pos++)
if (compare_prefixes(&pxset[pos], &from) >= 0)
break;
int p0 = pos;
char buf0[64];
bt_format_net(buf0, 64, &from.net);
bt_debug("Subnet walk for %s (round %d, %d nets)\n", buf0, round, num);
/* Subnet walk */
TRIE_WALK(trie, net, &from.net)
{
log_networks(&net, &pxset[pos].net);
bt_assert(net_equal(&net, &pxset[pos].net));
bt_assert(net_in_netX(&net, &from.net));
/* Skip possible duplicates */
while (net_equal(&pxset[pos].net, &pxset[pos + 1].net))
pos++;
pos++;
}
TRIE_WALK_END;
bt_assert((pos == num) || !net_in_netX(&pxset[pos].net, &from.net));
bt_debug("Subnet walk done for %s (found %d nets)\n", buf0, pos - p0);
lp_flush(lp);
}
bt_bird_cleanup();
return 1; return 1;
} }
@ -642,6 +783,7 @@ main(int argc, char *argv[])
bt_test_suite(t_match_inner_net, "Testing random inner prefix matching"); bt_test_suite(t_match_inner_net, "Testing random inner prefix matching");
bt_test_suite(t_match_outer_net, "Testing random outer prefix matching"); bt_test_suite(t_match_outer_net, "Testing random outer prefix matching");
bt_test_suite(t_trie_same, "A trie filled forward should be same with a trie filled backward."); bt_test_suite(t_trie_same, "A trie filled forward should be same with a trie filled backward.");
bt_test_suite(t_trie_walk, "Testing TRIE_WALK() on random tries");
// bt_test_suite(t_bench_trie_datasets_subset, "Benchmark tries from datasets by random subset of nets"); // bt_test_suite(t_bench_trie_datasets_subset, "Benchmark tries from datasets by random subset of nets");
// bt_test_suite(t_bench_trie_datasets_random, "Benchmark tries from datasets by generated addresses"); // bt_test_suite(t_bench_trie_datasets_random, "Benchmark tries from datasets by generated addresses");