Fixes responsiveness for protocol shutdown.

When a protocol went down, all its routes were flushed in one step, that
may block BIRD for too much time. The patch fixes that by limiting
maximum number of routes flushed in one step.
This commit is contained in:
Ondrej Zajicek 2012-03-28 18:40:04 +02:00
parent cb3cf95859
commit fb829de690
4 changed files with 178 additions and 73 deletions

View file

@ -38,7 +38,7 @@ static event *proto_flush_event;
static char *p_states[] = { "DOWN", "START", "UP", "STOP" };
static char *c_states[] = { "HUNGRY", "FEEDING", "HAPPY", "FLUSHING" };
static void proto_flush_all(void *);
static void proto_flush_loop(void *);
static void proto_rethink_goal(struct proto *p);
static char *proto_state_name(struct proto *p);
@ -635,7 +635,7 @@ protos_build(void)
#endif
proto_pool = rp_new(&root_pool, "Protocols");
proto_flush_event = ev_new(proto_pool);
proto_flush_event->hook = proto_flush_all;
proto_flush_event->hook = proto_flush_loop;
}
static void
@ -691,20 +691,6 @@ proto_feed_initial(void *P)
proto_feed_more(P);
}
static void
proto_schedule_flush(struct proto *p)
{
/* Need to abort feeding */
if (p->core_state == FS_FEEDING)
rt_feed_baby_abort(p);
DBG("%s: Scheduling flush\n", p->name);
p->core_state = FS_FLUSHING;
proto_relink(p);
proto_flush_hooks(p);
ev_schedule(proto_flush_event);
}
static void
proto_schedule_feed(struct proto *p, int initial)
{
@ -721,6 +707,85 @@ proto_schedule_feed(struct proto *p, int initial)
ev_schedule(p->attn);
}
/*
* Flushing loop is responsible for flushing routes and protocols
* after they went down. It runs in proto_flush_event. At the start of
* one round, protocols waiting to flush are marked in
* proto_schedule_flush_loop(). At the end of the round (when routing
* table flush is complete), marked protocols are flushed and a next
* round may start.
*/
static int flush_loop_state; /* 1 -> running */
static void
proto_schedule_flush_loop(void)
{
struct proto *p;
if (flush_loop_state)
return;
flush_loop_state = 1;
rt_schedule_prune_all();
WALK_LIST(p, flush_proto_list)
p->flushing = 1;
ev_schedule(proto_flush_event);
}
static void
proto_flush_loop(void *unused UNUSED)
{
struct proto *p;
if (! rt_prune_loop())
{
/* Rtable pruning is not finished */
ev_schedule(proto_flush_event);
return;
}
again:
WALK_LIST(p, flush_proto_list)
if (p->flushing)
{
/* This will flush interfaces in the same manner
like rt_prune_all() flushes routes */
if (p->proto == &proto_unix_iface)
if_flush_ifaces(p);
DBG("Flushing protocol %s\n", p->name);
p->flushing = 0;
p->core_state = FS_HUNGRY;
proto_relink(p);
if (p->proto_state == PS_DOWN)
proto_fell_down(p);
goto again;
}
/* This round finished, perhaps there will be another one */
flush_loop_state = 0;
if (!EMPTY_LIST(flush_proto_list))
proto_schedule_flush_loop();
}
static void
proto_schedule_flush(struct proto *p)
{
/* Need to abort feeding */
if (p->core_state == FS_FEEDING)
rt_feed_baby_abort(p);
DBG("%s: Scheduling flush\n", p->name);
p->core_state = FS_FLUSHING;
proto_relink(p);
proto_flush_hooks(p);
proto_schedule_flush_loop();
}
/**
* proto_request_feeding - request feeding routes to the protocol
* @p: given protocol
@ -810,27 +875,6 @@ proto_notify_state(struct proto *p, unsigned ps)
}
}
static void
proto_flush_all(void *unused UNUSED)
{
struct proto *p;
rt_prune_all();
while ((p = HEAD(flush_proto_list))->n.next)
{
/* This will flush interfaces in the same manner
like rt_prune_all() flushes routes */
if (p->proto == &proto_unix_iface)
if_flush_ifaces(p);
DBG("Flushing protocol %s\n", p->name);
p->core_state = FS_HUNGRY;
proto_relink(p);
if (p->proto_state == PS_DOWN)
proto_fell_down(p);
}
}
/*
* CLI Commands
*/

View file

@ -144,6 +144,7 @@ struct proto {
unsigned core_goal; /* State we want to reach (see below) */
unsigned reconfiguring; /* We're shutting down due to reconfiguration */
unsigned refeeding; /* We are refeeding (valid only if core_state == FS_FEEDING) */
unsigned flushing; /* Protocol is flushed in current flush loop round */
u32 hash_key; /* Random key used for hashing of neighbors */
bird_clock_t last_state_change; /* Time of last state transition */
char *last_state_name_announced; /* Last state name we've announced to the user */

View file

@ -139,8 +139,10 @@ typedef struct rtable {
int gc_counter; /* Number of operations since last GC */
bird_clock_t gc_time; /* Time of last GC */
byte gc_scheduled; /* GC is scheduled */
byte prune_state; /* Table prune state, 1 -> prune is running */
byte hcu_scheduled; /* Hostcache update is scheduled */
byte nhu_state; /* Next Hop Update state */
struct fib_iterator prune_fit; /* Rtable prune FIB iterator */
struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */
} rtable;
@ -244,7 +246,8 @@ void rt_dump(rtable *);
void rt_dump_all(void);
int rt_feed_baby(struct proto *p);
void rt_feed_baby_abort(struct proto *p);
void rt_prune_all(void);
void rt_schedule_prune_all(void);
int rt_prune_loop(void);
struct rtable_config *rt_new_table(struct symbol *s);
struct rt_show_data {

View file

@ -55,7 +55,6 @@ static void rt_free_hostcache(rtable *tab);
static void rt_notify_hostcache(rtable *tab, net *net);
static void rt_update_hostcache(rtable *tab);
static void rt_next_hop_update(rtable *tab);
static void rt_prune(rtable *tab);
static inline void rt_schedule_gc(rtable *tab);
@ -837,6 +836,38 @@ rt_schedule_nhu(rtable *tab)
tab->nhu_state |= 1;
}
static void
rt_prune_nets(rtable *tab)
{
struct fib_iterator fit;
int ncnt = 0, ndel = 0;
#ifdef DEBUGGING
fib_check(&tab->fib);
#endif
FIB_ITERATE_INIT(&fit, &tab->fib);
again:
FIB_ITERATE_START(&tab->fib, &fit, f)
{
net *n = (net *) f;
ncnt++;
if (!n->routes) /* Orphaned FIB entry */
{
FIB_ITERATE_PUT(&fit, f);
fib_delete(&tab->fib, f);
ndel++;
goto again;
}
}
FIB_ITERATE_END(f);
DBG("Pruned %d of %d networks\n", ndel, ncnt);
tab->gc_counter = 0;
tab->gc_time = now;
tab->gc_scheduled = 0;
}
static void
rt_event(void *ptr)
{
@ -849,7 +880,7 @@ rt_event(void *ptr)
rt_next_hop_update(tab);
if (tab->gc_scheduled)
rt_prune(tab);
rt_prune_nets(tab);
}
void
@ -885,70 +916,96 @@ rt_init(void)
init_list(&routing_tables);
}
/**
* rt_prune - prune a routing table
* @tab: routing table to be pruned
*
* This function is called whenever a protocol shuts down. It scans
* the routing table and removes all routes belonging to inactive
* protocols and also stale network entries.
*/
static void
rt_prune(rtable *tab)
/* Called from proto_schedule_flush_loop() only,
ensuring that all prune states are zero */
void
rt_schedule_prune_all(void)
{
struct fib_iterator fit;
int rcnt = 0, rdel = 0, ncnt = 0, ndel = 0;
rtable *t;
WALK_LIST(t, routing_tables)
t->prune_state = 1;
}
static inline int
rt_prune_step(rtable *tab, int *max_feed)
{
struct fib_iterator *fit = &tab->prune_fit;
DBG("Pruning route table %s\n", tab->name);
#ifdef DEBUGGING
fib_check(&tab->fib);
#endif
FIB_ITERATE_INIT(&fit, &tab->fib);
again:
FIB_ITERATE_START(&tab->fib, &fit, f)
if (tab->prune_state == 0)
return 1;
if (tab->prune_state == 1)
{
net *n = (net *) f;
FIB_ITERATE_INIT(fit, &tab->fib);
tab->prune_state = 2;
}
again:
FIB_ITERATE_START(&tab->fib, fit, fn)
{
net *n = (net *) fn;
rte *e;
ncnt++;
rescan:
for (e=n->routes; e; e=e->next, rcnt++)
for (e=n->routes; e; e=e->next)
if (e->sender->core_state != FS_HAPPY &&
e->sender->core_state != FS_FEEDING)
{
if (*max_feed <= 0)
{
FIB_ITERATE_PUT(fit, fn);
return 0;
}
rte_discard(tab, e);
rdel++;
(*max_feed)--;
goto rescan;
}
if (!n->routes) /* Orphaned FIB entry? */
if (!n->routes) /* Orphaned FIB entry */
{
FIB_ITERATE_PUT(&fit, f);
fib_delete(&tab->fib, f);
ndel++;
FIB_ITERATE_PUT(fit, fn);
fib_delete(&tab->fib, fn);
goto again;
}
}
FIB_ITERATE_END(f);
DBG("Pruned %d of %d routes and %d of %d networks\n", rdel, rcnt, ndel, ncnt);
FIB_ITERATE_END(fn);
#ifdef DEBUGGING
fib_check(&tab->fib);
#endif
tab->gc_counter = 0;
tab->gc_time = now;
tab->gc_scheduled = 0;
tab->prune_state = 0;
return 1;
}
/**
* rt_prune_all - prune all routing tables
* rt_prune_loop - prune routing tables
* @tab: routing table to be pruned
*
* This function calls rt_prune() for all known routing tables.
* The prune loop scans routing tables and removes routes belonging to
* inactive protocols and also stale network entries. Returns 1 when
* all such routes are pruned. It is a part of the protocol flushing
* loop.
*/
void
rt_prune_all(void)
int
rt_prune_loop(void)
{
rtable *t;
int max_feed = 512;
WALK_LIST(t, routing_tables)
rt_prune(t);
if (! rt_prune_step(t, &max_feed))
return 0;
return 1;
}
void