From 3ab001b97402bc01a4777cf1cb60ce940d50ffe3 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Sun, 20 Dec 1998 14:01:20 +0000 Subject: [PATCH] Rewrote fib functions to make them insert/delete/asynchronous-walk safe. This is implemented in a way similar to lib/slists.h, but it took some more effort to make rehashing not disturb the readers. We do it by just taking _highest_ k bits of ipa_hash as our hash value and sorting each box by whole ipa_hash(). Consult FIB_ITERATE_* macros in nest/route.h. Implemented fib_check() debugging function and also rewrote the rehashing algorithm to use better thresholds and not to waste time by rehashing forth and back. --- nest/route.h | 46 ++++++- nest/rt-fib.c | 339 +++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 349 insertions(+), 36 deletions(-) diff --git a/nest/route.h b/nest/route.h index d21a32c8..da0f1cd2 100644 --- a/nest/route.h +++ b/nest/route.h @@ -17,21 +17,31 @@ struct proto; /* * Generic data structure for storing network prefixes. Also used - * for the master routing table. Currently implemented as a hash + * for the master routing table. Currently implemented as a hash * table. * * Available operations: * - insertion of new entry * - deletion of entry * - searching for entry by network prefix + * - asynchronous retrieval of fib contents */ struct fib_node { - ip_addr prefix; /* In host order */ + struct fib_node *next; /* Next in hash chain */ + struct fib_iterator *readers; /* List of readers of this node */ byte pxlen; byte flags; /* User-defined */ byte x0, x1; /* User-defined */ - struct fib_node *next; /* Next in hash chain */ + ip_addr prefix; /* In host order */ +}; + +struct fib_iterator { /* See lib/slists.h for an explanation */ + struct fib_iterator *prev, *next; /* Must be synced with struct fib_node! */ + struct fib_node *node; /* Or NULL if freshly merged */ + byte efef; /* 0xff to distinguish between iterator and node */ + byte pad[3]; + unsigned int hash; }; struct fib { @@ -39,17 +49,23 @@ struct fib { slab *fib_slab; /* Slab holding all fib nodes */ struct fib_node **hash_table; /* Node hash table */ unsigned int hash_size; /* Number of hash table entries (a power of two) */ - unsigned int hash_mask; /* hash_size - 1 */ + unsigned int hash_order; /* Binary logarithm of hash_size */ + unsigned int hash_shift; /* 16 - hash_log */ unsigned int entries; /* Number of entries */ unsigned int entries_min, entries_max;/* Entry count limits (else start rehashing) */ void (*init)(struct fib_node *); /* Constructor */ }; -void fib_init(struct fib *, pool *, unsigned node_size, unsigned hash_size, void (*init)(struct fib_node *)); +void fib_init(struct fib *, pool *, unsigned node_size, unsigned hash_order, void (*init)(struct fib_node *)); void *fib_find(struct fib *, ip_addr *, int); /* Find or return NULL if doesn't exist */ void *fib_get(struct fib *, ip_addr *, int); /* Find or create new if nonexistent */ void fib_delete(struct fib *, void *); /* Remove fib entry */ void fib_free(struct fib *); /* Destroy the fib */ +void fib_check(struct fib *); /* Consistency check for debugging */ + +void fit_init(struct fib_iterator *, struct fib *); /* Internal functions, don't call */ +struct fib_node *fit_get(struct fib *, struct fib_iterator *); +void fit_put(struct fib_iterator *, struct fib_node *); #define FIB_WALK(fib, z) do { \ struct fib_node *z, **ff = (fib)->hash_table; \ @@ -59,6 +75,24 @@ void fib_free(struct fib *); /* Destroy the fib */ #define FIB_WALK_END } while (0) +#define FIB_ITERATE_INIT(it, fib) fit_init(it, fib) + +#define FIB_ITERATE_START(fib, it, z) do { \ + struct fib_node *z = fit_get(fib, it); \ + unsigned int count = (fib)->hash_size; \ + unsigned int hpos = (it)->hash; \ + for(;;) { \ + again: if (!z) { \ + if (++hpos >= count) \ + break; \ + z = (fib)->hash_table[hpos]; \ + goto again; \ + } + +#define FIB_ITERATE_END z = z->next; } } while(0) + +#define FIB_ITERATE_PUT(it, z) fit_put(it, z) + /* * Master Routing Tables. Generally speaking, each of them is a list * of FIB (one per TOS) with each entry pointing to a list of route entries @@ -76,7 +110,7 @@ typedef struct rtable { } rtable; typedef struct network { - struct fib_node n; /* FIB flags,x0 reserved for kernel syncer */ + struct fib_node n; /* FIB flags reserved for kernel syncer */ struct rte *routes; /* Available routes for this network */ } net; diff --git a/nest/rt-fib.c b/nest/rt-fib.c index 3840712b..15db74c4 100644 --- a/nest/rt-fib.c +++ b/nest/rt-fib.c @@ -13,24 +13,30 @@ #include "nest/bird.h" #include "nest/route.h" -#define HASH_DEF_SIZE 1024 -#define HASH_HI_MARK *16 -#define HASH_LO_MARK *0 -#define HASH_LO_MIN 128 -#define HASH_HI_RESIZE *16 -#define HASH_LO_RESIZE *0 +#define HASH_DEF_ORDER 10 +#define HASH_HI_MARK *4 +#define HASH_HI_STEP 2 +#define HASH_HI_MAX 16 /* Must be at most 16 */ +#define HASH_LO_MARK /5 +#define HASH_LO_STEP 2 +#define HASH_LO_MIN 10 static void fib_ht_alloc(struct fib *f) { - f->hash_mask = f->hash_size - 1; - f->entries_max = f->hash_size HASH_HI_MARK; - f->entries_min = f->hash_size HASH_LO_MARK; - if (f->entries_min < HASH_LO_MIN) + f->hash_size = 1 << f->hash_order; + f->hash_shift = 16 - f->hash_order; + if (f->hash_order > HASH_HI_MAX - HASH_HI_STEP) + f->entries_max = ~0; + else + f->entries_max = f->hash_size HASH_HI_MARK; + if (f->hash_order < HASH_LO_MIN + HASH_LO_STEP) f->entries_min = 0; - DBG("Allocating FIB: %d entries, %d low, %d high\n", f->hash_size, f->entries_min, f->entries_max); + else + f->entries_min = f->hash_size HASH_LO_MARK; + DBG("Allocating FIB hash of order %d: %d entries, %d low, %d high\n", + f->hash_order, f->hash_size, f->entries_min, f->entries_max); f->hash_table = mb_alloc(f->fib_pool, f->hash_size * sizeof(struct fib_node *)); - bzero(f->hash_table, f->hash_size * sizeof(struct fib_node *)); } static inline void @@ -42,46 +48,64 @@ fib_ht_free(struct fib_node **h) static inline unsigned fib_hash(struct fib *f, ip_addr *a) { - return ipa_hash(*a) & f->hash_mask; + return ipa_hash(*a) >> f->hash_shift; } void -fib_init(struct fib *f, pool *p, unsigned node_size, unsigned hash_size, void (*init)(struct fib_node *)) +fib_init(struct fib *f, pool *p, unsigned node_size, unsigned hash_order, void (*init)(struct fib_node *)) { - if (!hash_size) - hash_size = HASH_DEF_SIZE; + if (!hash_order) + hash_order = HASH_DEF_ORDER; f->fib_pool = p; f->fib_slab = sl_new(p, node_size); - f->hash_size = hash_size; + f->hash_order = hash_order; fib_ht_alloc(f); + bzero(f->hash_table, f->hash_size * sizeof(struct fib_node *)); f->entries = 0; f->entries_min = 0; f->init = init; } static void -fib_rehash(struct fib *f, unsigned new) +fib_rehash(struct fib *f, int step) { - unsigned old; + unsigned old, new, oldn, newn, ni, nh; struct fib_node **n, *e, *x, **t, **m, **h; - old = f->hash_size; + old = f->hash_order; + oldn = f->hash_size; + new = old + step; m = h = f->hash_table; - DBG("Re-hashing FIB from %d to %d\n", old, new); - f->hash_size = new; + DBG("Re-hashing FIB from order %d to %d\n", old, new); + f->hash_order = new; fib_ht_alloc(f); - n = f->hash_table; + t = n = f->hash_table; + newn = f->hash_size; + ni = 0; + while (old--) { x = *h++; while (e = x) { x = e->next; - t = n + fib_hash(f, &e->prefix); - e->next = *t; + nh = fib_hash(f, &e->prefix); + while (nh > ni) + { + *t = NULL; + ni++; + t = ++n; + } *t = e; + t = &e->next; } } + while (ni < newn) + { + *t = NULL; + ni++; + t = ++n; + } fib_ht_free(m); } @@ -98,8 +122,9 @@ fib_find(struct fib *f, ip_addr *a, int len) void * fib_get(struct fib *f, ip_addr *a, int len) { - struct fib_node **ee = f->hash_table + fib_hash(f, a); - struct fib_node *e = *ee; + unsigned int h = ipa_hash(*a); + struct fib_node **ee = f->hash_table + (h >> f->hash_shift); + struct fib_node *g, *e = *ee; while (e && (e->pxlen != len || !ipa_equal(*a, e->prefix))) e = e->next; @@ -112,28 +137,80 @@ fib_get(struct fib *f, ip_addr *a, int len) e = sl_alloc(f->fib_slab); e->prefix = *a; e->pxlen = len; + while ((g = *ee) && ipa_hash(g->prefix) < h) + ee = &g->next; e->next = *ee; *ee = e; + e->readers = NULL; f->init(e); if (f->entries++ > f->entries_max) - fib_rehash(f, f->hash_size HASH_HI_RESIZE); + fib_rehash(f, HASH_HI_STEP); return e; } +static inline void +fib_merge_readers(struct fib_iterator *i, struct fib_node *to) +{ + if (to) + { + struct fib_iterator *j = to->readers; + if (!j) + { + /* Fast path */ + to->readers = i; + i->prev = (struct fib_iterator *) to; + fixup: + while (i && i->node) + { + i->node = NULL; + i = i->next; + } + return; + } + /* Really merging */ + while (j->next) + j = j->next; + j->next = i; + i->prev = j; + goto fixup; + } + else /* No more nodes */ + while (i) + { + i->prev = NULL; + i = i->next; + } +} + void fib_delete(struct fib *f, void *E) { struct fib_node *e = E; - struct fib_node **ee = f->hash_table + fib_hash(f, &e->prefix); + unsigned int h = fib_hash(f, &e->prefix); + struct fib_node **ee = f->hash_table + h; + struct fib_iterator *it; while (*ee) { if (*ee == e) { *ee = e->next; + if (it = e->readers) + { + struct fib_node *l = e->next; + while (!l) + { + h++; + if (h >= f->hash_size) + break; + else + l = f->hash_table[h]; + } + fib_merge_readers(it, l); + } sl_free(f->fib_slab, e); if (f->entries-- < f->entries_min) - fib_rehash(f, f->hash_size HASH_LO_RESIZE); + fib_rehash(f, -HASH_LO_STEP); return; } ee = &((*ee)->next); @@ -147,3 +224,205 @@ fib_free(struct fib *f) fib_ht_free(f->hash_table); rfree(f->fib_slab); } + +void +fit_init(struct fib_iterator *i, struct fib *f) +{ + unsigned h; + struct fib_node *n; + + i->efef = 0xff; + for(h=0; hhash_size; h++) + if (n = f->hash_table[h]) + { + i->prev = (struct fib_iterator *) n; + if (i->next = n->readers) + i->next->prev = i; + n->readers = i; + i->node = n; + return; + } + /* The fib is empty, nothing to do */ + i->prev = i->next = NULL; + i->node = NULL; +} + +struct fib_node * +fit_get(struct fib *f, struct fib_iterator *i) +{ + struct fib_node *n; + struct fib_iterator *j, *k; + + if (!i->prev) + { + /* We are at the end */ + i->hash = f->hash_size; + return NULL; + } + if (!(n = i->node)) + { + /* No node info available, we are a victim of merging. Try harder. */ + j = i; + while (j->efef == 0xff) + j = j->prev; + n = (struct fib_node *) j; + } + j = i->prev; + if (k = i->next) + k->prev = j; + j->next = k; + i->hash = fib_hash(f, &n->prefix); + return n; +} + +void +fit_put(struct fib_iterator *i, struct fib_node *n) +{ + struct fib_iterator *j; + + i->node = n; + if (j = n->readers) + j->prev = i; + i->next = j; + n->readers = i; + i->prev = (struct fib_iterator *) n; +} + +#ifdef DEBUGGING + +void +fib_check(struct fib *f) +{ + unsigned int i, ec, lo, nulls; + + ec = 0; + for(i=0; ihash_size; i++) + { + struct fib_node *n; + lo = 0; + for(n=f->hash_table[i]; n; n=n->next) + { + struct fib_iterator *j, *j0; + unsigned int h0 = ipa_hash(n->prefix); + if (h0 < lo) + die("fib_check: discord in hash chains"); + lo = h0; + if ((h0 >> f->hash_shift) != i) + die("fib_check: mishashed %x->%x (order %d)", h0, i, f->hash_order); + j0 = (struct fib_iterator *) n; + nulls = 0; + for(j=n->readers; j; j=j->next) + { + if (j->prev != j0) + die("fib_check: iterator->prev mismatch"); + j0 = j; + if (!j->node) + nulls++; + else if (nulls) + die("fib_check: iterator nullified"); + else if (j->node != n) + die("fib_check: iterator->node mismatch"); + } + ec++; + } + } + if (ec != f->entries) + die("fib_check: invalid entry count (%d != %d)", ec, f->entries); +} + +#endif + +#ifdef TEST + +#include "lib/resource.h" + +struct fib f; + +void dump(char *m) +{ + unsigned int i; + + debug("%s ... order=%d, size=%d, entries=%d\n", m, f.hash_order, f.hash_size, f.hash_size); + for(i=0; inext) + { + debug("%04x %04x %p %I/%2d", i, ipa_hash(n->prefix), n, n->prefix, n->pxlen); + for(j=n->readers; j; j=j->next) + debug(" %p[%p]", j, j->node); + debug("\n"); + } + } + fib_check(&f); + debug("-----\n"); +} + +void init(struct fib_node *n) +{ +} + +int main(void) +{ + struct fib_node *n; + struct fib_iterator i, j; + ip_addr a; + int c; + + log_init_debug(NULL); + resource_init(); + fib_init(&f, &root_pool, sizeof(struct fib_node), 4, init); + dump("init"); + + a = ipa_from_u32(0x01020304); n = fib_get(&f, &a, 32); + a = ipa_from_u32(0x02030405); n = fib_get(&f, &a, 32); + a = ipa_from_u32(0x03040506); n = fib_get(&f, &a, 32); + a = ipa_from_u32(0x00000000); n = fib_get(&f, &a, 32); + a = ipa_from_u32(0x00000c01); n = fib_get(&f, &a, 32); + a = ipa_from_u32(0xffffffff); n = fib_get(&f, &a, 32); + dump("fill"); + + fit_init(&i, &f); + dump("iter init"); + + fib_rehash(&f, 1); + dump("rehash up"); + + fib_rehash(&f, -1); + dump("rehash down"); + +next: + c = 0; + FIB_ITERATE_START(&f, &i, z) + { + if (c) + { + FIB_ITERATE_PUT(&i, z); + dump("iter"); + goto next; + } + c = 1; + debug("got %p\n", z); + } + FIB_ITERATE_END; + dump("iter end"); + + fit_init(&i, &f); + fit_init(&j, &f); + dump("iter init 2"); + + n = fit_get(&f, &i); + dump("iter step 2"); + + fit_put(&i, n->next); + dump("iter step 3"); + + a = ipa_from_u32(0xffffffff); n = fib_get(&f, &a, 32); + fib_delete(&f, n); + dump("iter step 3"); + + return 0; +} + +#endif