Nest: Implement locking of prefix tries during walks
The prune loop may may rebuild the prefix trie and therefore invalidate walk state for asynchronous walks (used in 'show route in' cmd). Fix it by adding locking that keeps the old trie in memory until current walks are done. In future this could be improved by rebuilding trie walk states (by lookup for last found prefix) after the prefix trie rebuild.
This commit is contained in:
parent
de6318f70a
commit
5a89edc6fd
3 changed files with 95 additions and 2 deletions
|
@ -191,6 +191,9 @@ typedef struct rtable {
|
||||||
struct fib_iterator prune_fit; /* Rtable prune FIB iterator */
|
struct fib_iterator prune_fit; /* Rtable prune FIB iterator */
|
||||||
struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */
|
struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */
|
||||||
struct f_trie *trie_new; /* New prefix trie defined during pruning */
|
struct f_trie *trie_new; /* New prefix trie defined during pruning */
|
||||||
|
struct f_trie *trie_old; /* Old prefix trie waiting to be freed */
|
||||||
|
u32 trie_lock_count; /* Prefix trie locked by walks */
|
||||||
|
u32 trie_old_lock_count; /* Old prefix trie locked by walks */
|
||||||
|
|
||||||
list subscribers; /* Subscribers for notifications */
|
list subscribers; /* Subscribers for notifications */
|
||||||
struct timer *settle_timer; /* Settle time for notifications */
|
struct timer *settle_timer; /* Settle time for notifications */
|
||||||
|
@ -332,6 +335,8 @@ void rt_preconfig(struct config *);
|
||||||
void rt_commit(struct config *new, struct config *old);
|
void rt_commit(struct config *new, struct config *old);
|
||||||
void rt_lock_table(rtable *);
|
void rt_lock_table(rtable *);
|
||||||
void rt_unlock_table(rtable *);
|
void rt_unlock_table(rtable *);
|
||||||
|
struct f_trie * rt_lock_trie(rtable *tab);
|
||||||
|
void rt_unlock_trie(rtable *tab, struct f_trie *trie);
|
||||||
void rt_subscribe(rtable *tab, struct rt_subscription *s);
|
void rt_subscribe(rtable *tab, struct rt_subscription *s);
|
||||||
void rt_unsubscribe(struct rt_subscription *s);
|
void rt_unsubscribe(struct rt_subscription *s);
|
||||||
void rt_flowspec_link(rtable *src, rtable *dst);
|
void rt_flowspec_link(rtable *src, rtable *dst);
|
||||||
|
@ -405,6 +410,7 @@ struct rt_show_data {
|
||||||
struct rt_show_data_rtable *last_table; /* Last table in output */
|
struct rt_show_data_rtable *last_table; /* Last table in output */
|
||||||
struct fib_iterator fit; /* Iterator over networks in table */
|
struct fib_iterator fit; /* Iterator over networks in table */
|
||||||
struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */
|
struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */
|
||||||
|
struct f_trie *walk_lock; /* Locked trie for walking */
|
||||||
int verbose, tables_defined_by;
|
int verbose, tables_defined_by;
|
||||||
const struct filter *filter;
|
const struct filter *filter;
|
||||||
struct proto *show_protocol;
|
struct proto *show_protocol;
|
||||||
|
|
|
@ -222,6 +222,9 @@ rt_show_cleanup(struct cli *c)
|
||||||
if (d->table_open && !d->trie_walk)
|
if (d->table_open && !d->trie_walk)
|
||||||
fit_get(&d->tab->table->fib, &d->fit);
|
fit_get(&d->tab->table->fib, &d->fit);
|
||||||
|
|
||||||
|
if (d->walk_lock)
|
||||||
|
rt_unlock_trie(d->tab->table, d->walk_lock);
|
||||||
|
|
||||||
/* Unlock referenced tables */
|
/* Unlock referenced tables */
|
||||||
WALK_LIST(tab, d->tables)
|
WALK_LIST(tab, d->tables)
|
||||||
rt_unlock_table(tab->table);
|
rt_unlock_table(tab->table);
|
||||||
|
@ -257,7 +260,10 @@ rt_show_cont(struct cli *c)
|
||||||
d->walk_state = lp_allocz(c->parser_pool, sizeof (struct f_trie_walk_state));
|
d->walk_state = lp_allocz(c->parser_pool, sizeof (struct f_trie_walk_state));
|
||||||
|
|
||||||
if (d->trie_walk)
|
if (d->trie_walk)
|
||||||
|
{
|
||||||
|
d->walk_lock = rt_lock_trie(tab);
|
||||||
trie_walk_init(d->walk_state, tab->trie, d->addr);
|
trie_walk_init(d->walk_state, tab->trie, d->addr);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
FIB_ITERATE_INIT(&d->fit, &tab->fib);
|
FIB_ITERATE_INIT(&d->fit, &tab->fib);
|
||||||
|
|
||||||
|
@ -288,6 +294,9 @@ rt_show_cont(struct cli *c)
|
||||||
if (!--max)
|
if (!--max)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rt_unlock_trie(tab, d->walk_lock);
|
||||||
|
d->walk_lock = NULL;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -2471,7 +2471,19 @@ again:
|
||||||
if (tab->trie_new)
|
if (tab->trie_new)
|
||||||
{
|
{
|
||||||
/* Finish prefix trie pruning */
|
/* Finish prefix trie pruning */
|
||||||
rfree(tab->trie->lp);
|
|
||||||
|
if (!tab->trie_lock_count)
|
||||||
|
{
|
||||||
|
rfree(tab->trie->lp);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ASSERT(!tab->trie_old);
|
||||||
|
tab->trie_old = tab->trie;
|
||||||
|
tab->trie_old_lock_count = tab->trie_lock_count;
|
||||||
|
tab->trie_lock_count = 0;
|
||||||
|
}
|
||||||
|
|
||||||
tab->trie = tab->trie_new;
|
tab->trie = tab->trie_new;
|
||||||
tab->trie_new = NULL;
|
tab->trie_new = NULL;
|
||||||
tab->prune_trie = 0;
|
tab->prune_trie = 0;
|
||||||
|
@ -2479,7 +2491,7 @@ again:
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* Schedule prefix trie pruning */
|
/* Schedule prefix trie pruning */
|
||||||
if (tab->trie && (tab->trie->prefix_count > (2 * tab->fib.entries)))
|
if (tab->trie && !tab->trie_old && (tab->trie->prefix_count > (2 * tab->fib.entries)))
|
||||||
{
|
{
|
||||||
/* state change 0->1, 2->3 */
|
/* state change 0->1, 2->3 */
|
||||||
tab->prune_state |= 1;
|
tab->prune_state |= 1;
|
||||||
|
@ -2504,6 +2516,72 @@ again:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* rt_lock_trie - lock a prefix trie of a routing table
|
||||||
|
* @tab: routing table with prefix trie to be locked
|
||||||
|
*
|
||||||
|
* The prune loop may rebuild the prefix trie and invalidate f_trie_walk_state
|
||||||
|
* structures. Therefore, asynchronous walks should lock the prefix trie using
|
||||||
|
* this function. That allows the prune loop to rebuild the trie, but postpones
|
||||||
|
* its freeing until all walks are done (unlocked by rt_unlock_trie()).
|
||||||
|
*
|
||||||
|
* Return a current trie that will be locked, the value should be passed back to
|
||||||
|
* rt_unlock_trie() for unlocking.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
struct f_trie *
|
||||||
|
rt_lock_trie(rtable *tab)
|
||||||
|
{
|
||||||
|
ASSERT(tab->trie);
|
||||||
|
|
||||||
|
tab->trie_lock_count++;
|
||||||
|
return tab->trie;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* rt_unlock_trie - unlock a prefix trie of a routing table
|
||||||
|
* @tab: routing table with prefix trie to be locked
|
||||||
|
* @trie: value returned by matching rt_lock_trie()
|
||||||
|
*
|
||||||
|
* Done for trie locked by rt_lock_trie() after walk over the trie is done.
|
||||||
|
* It may free the trie and schedule next trie pruning.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
rt_unlock_trie(rtable *tab, struct f_trie *trie)
|
||||||
|
{
|
||||||
|
ASSERT(trie);
|
||||||
|
|
||||||
|
if (trie == tab->trie)
|
||||||
|
{
|
||||||
|
/* Unlock the current prefix trie */
|
||||||
|
ASSERT(tab->trie_lock_count);
|
||||||
|
tab->trie_lock_count--;
|
||||||
|
}
|
||||||
|
else if (trie == tab->trie_old)
|
||||||
|
{
|
||||||
|
/* Unlock the old prefix trie */
|
||||||
|
ASSERT(tab->trie_old_lock_count);
|
||||||
|
tab->trie_old_lock_count--;
|
||||||
|
|
||||||
|
/* Free old prefix trie that is no longer needed */
|
||||||
|
if (!tab->trie_old_lock_count)
|
||||||
|
{
|
||||||
|
rfree(tab->trie_old->lp);
|
||||||
|
tab->trie_old = NULL;
|
||||||
|
|
||||||
|
/* Kick prefix trie pruning that was postponed */
|
||||||
|
if (tab->trie && (tab->trie->prefix_count > (2 * tab->fib.entries)))
|
||||||
|
{
|
||||||
|
tab->prune_trie = 1;
|
||||||
|
rt_schedule_prune(tab);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
log(L_BUG "Invalid arg to rt_unlock_trie()");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
rt_preconfig(struct config *c)
|
rt_preconfig(struct config *c)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in a new issue