Nest: Implement locking of prefix tries during walks

The prune loop may may rebuild the prefix trie and therefore invalidate
walk state for asynchronous walks (used in 'show route in' cmd). Fix it
by adding locking that keeps the old trie in memory until current walks
are done.

In future this could be improved by rebuilding trie walk states (by
lookup for last found prefix) after the prefix trie rebuild.
This commit is contained in:
Ondrej Zajicek (work) 2022-02-04 05:34:02 +01:00
parent de6318f70a
commit 5a89edc6fd
3 changed files with 95 additions and 2 deletions

View file

@ -191,6 +191,9 @@ typedef struct rtable {
struct fib_iterator prune_fit; /* Rtable prune FIB iterator */
struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */
struct f_trie *trie_new; /* New prefix trie defined during pruning */
struct f_trie *trie_old; /* Old prefix trie waiting to be freed */
u32 trie_lock_count; /* Prefix trie locked by walks */
u32 trie_old_lock_count; /* Old prefix trie locked by walks */
list subscribers; /* Subscribers for notifications */
struct timer *settle_timer; /* Settle time for notifications */
@ -332,6 +335,8 @@ void rt_preconfig(struct config *);
void rt_commit(struct config *new, struct config *old);
void rt_lock_table(rtable *);
void rt_unlock_table(rtable *);
struct f_trie * rt_lock_trie(rtable *tab);
void rt_unlock_trie(rtable *tab, struct f_trie *trie);
void rt_subscribe(rtable *tab, struct rt_subscription *s);
void rt_unsubscribe(struct rt_subscription *s);
void rt_flowspec_link(rtable *src, rtable *dst);
@ -405,6 +410,7 @@ struct rt_show_data {
struct rt_show_data_rtable *last_table; /* Last table in output */
struct fib_iterator fit; /* Iterator over networks in table */
struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */
struct f_trie *walk_lock; /* Locked trie for walking */
int verbose, tables_defined_by;
const struct filter *filter;
struct proto *show_protocol;

View file

@ -222,6 +222,9 @@ rt_show_cleanup(struct cli *c)
if (d->table_open && !d->trie_walk)
fit_get(&d->tab->table->fib, &d->fit);
if (d->walk_lock)
rt_unlock_trie(d->tab->table, d->walk_lock);
/* Unlock referenced tables */
WALK_LIST(tab, d->tables)
rt_unlock_table(tab->table);
@ -257,7 +260,10 @@ rt_show_cont(struct cli *c)
d->walk_state = lp_allocz(c->parser_pool, sizeof (struct f_trie_walk_state));
if (d->trie_walk)
{
d->walk_lock = rt_lock_trie(tab);
trie_walk_init(d->walk_state, tab->trie, d->addr);
}
else
FIB_ITERATE_INIT(&d->fit, &tab->fib);
@ -288,6 +294,9 @@ rt_show_cont(struct cli *c)
if (!--max)
return;
}
rt_unlock_trie(tab, d->walk_lock);
d->walk_lock = NULL;
}
else
{

View file

@ -2471,7 +2471,19 @@ again:
if (tab->trie_new)
{
/* Finish prefix trie pruning */
rfree(tab->trie->lp);
if (!tab->trie_lock_count)
{
rfree(tab->trie->lp);
}
else
{
ASSERT(!tab->trie_old);
tab->trie_old = tab->trie;
tab->trie_old_lock_count = tab->trie_lock_count;
tab->trie_lock_count = 0;
}
tab->trie = tab->trie_new;
tab->trie_new = NULL;
tab->prune_trie = 0;
@ -2479,7 +2491,7 @@ again:
else
{
/* Schedule prefix trie pruning */
if (tab->trie && (tab->trie->prefix_count > (2 * tab->fib.entries)))
if (tab->trie && !tab->trie_old && (tab->trie->prefix_count > (2 * tab->fib.entries)))
{
/* state change 0->1, 2->3 */
tab->prune_state |= 1;
@ -2504,6 +2516,72 @@ again:
return;
}
/**
* rt_lock_trie - lock a prefix trie of a routing table
* @tab: routing table with prefix trie to be locked
*
* The prune loop may rebuild the prefix trie and invalidate f_trie_walk_state
* structures. Therefore, asynchronous walks should lock the prefix trie using
* this function. That allows the prune loop to rebuild the trie, but postpones
* its freeing until all walks are done (unlocked by rt_unlock_trie()).
*
* Return a current trie that will be locked, the value should be passed back to
* rt_unlock_trie() for unlocking.
*
*/
struct f_trie *
rt_lock_trie(rtable *tab)
{
ASSERT(tab->trie);
tab->trie_lock_count++;
return tab->trie;
}
/**
* rt_unlock_trie - unlock a prefix trie of a routing table
* @tab: routing table with prefix trie to be locked
* @trie: value returned by matching rt_lock_trie()
*
* Done for trie locked by rt_lock_trie() after walk over the trie is done.
* It may free the trie and schedule next trie pruning.
*/
void
rt_unlock_trie(rtable *tab, struct f_trie *trie)
{
ASSERT(trie);
if (trie == tab->trie)
{
/* Unlock the current prefix trie */
ASSERT(tab->trie_lock_count);
tab->trie_lock_count--;
}
else if (trie == tab->trie_old)
{
/* Unlock the old prefix trie */
ASSERT(tab->trie_old_lock_count);
tab->trie_old_lock_count--;
/* Free old prefix trie that is no longer needed */
if (!tab->trie_old_lock_count)
{
rfree(tab->trie_old->lp);
tab->trie_old = NULL;
/* Kick prefix trie pruning that was postponed */
if (tab->trie && (tab->trie->prefix_count > (2 * tab->fib.entries)))
{
tab->prune_trie = 1;
rt_schedule_prune(tab);
}
}
}
else
log(L_BUG "Invalid arg to rt_unlock_trie()");
}
void
rt_preconfig(struct config *c)
{