From 7f0e59820899c30a243c18556ce2e3fb72d6d221 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Wed, 1 Sep 2021 00:46:46 +0200 Subject: [PATCH 01/11] Bound allocated pages to resource pools with page caches to avoid unnecessary syscalls --- lib/resource.c | 58 +++++++++++++++++++++++++++++++++++++++++++++ lib/resource.h | 8 ++++--- lib/slab.c | 24 ++++++++++--------- sysdep/unix/alloc.c | 37 +++++++++++++---------------- 4 files changed, 92 insertions(+), 35 deletions(-) diff --git a/lib/resource.c b/lib/resource.c index 4c4b92ec..0ad886d9 100644 --- a/lib/resource.c +++ b/lib/resource.c @@ -31,9 +31,18 @@ struct pool { resource r; list inside; + struct pool_pages *pages; const char *name; }; +struct pool_pages { + uint free; + uint used; + void *ptr[0]; +}; + +#define POOL_PAGES_MAX ((page_size - sizeof(struct pool_pages)) / sizeof (void *)) + static void pool_dump(resource *); static void pool_free(resource *); static resource *pool_lookup(resource *, unsigned long); @@ -50,6 +59,10 @@ static struct resclass pool_class = { pool root_pool; +void *alloc_sys_page(void); +void free_sys_page(void *); +void resource_sys_init(void); + static int indent; /** @@ -82,6 +95,14 @@ pool_free(resource *P) xfree(r); r = rr; } + + if (p->pages) + { + ASSERT_DIE(!p->pages->used); + for (uint i=0; ipages->free; i++) + free_sys_page(p->pages->ptr[i]); + free_sys_page(p->pages); + } } static void @@ -107,6 +128,9 @@ pool_memsize(resource *P) WALK_LIST(r, p->inside) sum += rmemsize(r); + if (p->pages) + sum += page_size * (p->pages->used + p->pages->free + 1); + return sum; } @@ -259,6 +283,7 @@ rlookup(unsigned long a) void resource_init(void) { + resource_sys_init(); root_pool.r.class = &pool_class; root_pool.name = "Root"; init_list(&root_pool.inside); @@ -425,6 +450,39 @@ mb_free(void *m) rfree(b); } +void * +alloc_page(pool *p) +{ + if (!p->pages) + { + p->pages = alloc_sys_page(); + p->pages->free = 0; + p->pages->used = 1; + } + else + p->pages->used++; + + if (p->pages->free) + { + void *ptr = p->pages->ptr[--p->pages->free]; + bzero(ptr, page_size); + return ptr; + } + else + return alloc_sys_page(); +} + +void +free_page(pool *p, void *ptr) +{ + ASSERT_DIE(p->pages); + p->pages->used--; + + if (p->pages->free >= POOL_PAGES_MAX) + return free_sys_page(ptr); + else + p->pages->ptr[p->pages->free++] = ptr; +} #define STEP_UP(x) ((x) + (x)/2 + 4) diff --git a/lib/resource.h b/lib/resource.h index e65455c8..597d6c17 100644 --- a/lib/resource.h +++ b/lib/resource.h @@ -94,10 +94,12 @@ void sl_free(slab *, void *); void buffer_realloc(void **buf, unsigned *size, unsigned need, unsigned item_size); +extern long page_size; + /* Allocator of whole pages; for use in slabs and other high-level allocators. */ -u64 get_page_size(void); -void *alloc_page(void); -void free_page(void *); +void *alloc_page(pool *); +void free_page(pool *, void *); +#define PAGE_HEAD(x) ((void *) (((intptr_t) (x)) & ~(page_size-1))) #ifdef HAVE_LIBDMALLOC /* diff --git a/lib/slab.c b/lib/slab.c index 8d16c433..70aa776a 100644 --- a/lib/slab.c +++ b/lib/slab.c @@ -152,6 +152,7 @@ slab_memsize(resource *r) struct slab { resource r; + pool *p; uint obj_size, head_size, head_bitfield_len; uint objs_per_slab, num_empty_heads, data_size; list empty_heads, partial_heads, full_heads; @@ -191,6 +192,7 @@ slab * sl_new(pool *p, uint size) { slab *s = ralloc(p, &sl_class); + s->p = p; uint align = sizeof(struct sl_alignment); if (align < sizeof(int)) align = sizeof(int); @@ -199,7 +201,6 @@ sl_new(pool *p, uint size) s->obj_size = size; s->head_size = sizeof(struct sl_head); - u64 page_size = get_page_size(); do { s->objs_per_slab = (page_size - s->head_size) / size; @@ -268,9 +269,9 @@ no_partial: s->num_empty_heads--; goto okay; } - h = alloc_page(); + h = alloc_page(s->p); #ifdef POISON - memset(h, 0xba, get_page_size()); + memset(h, 0xba, page_size); #endif ASSERT_DIE(SL_GET_HEAD(h) == h); memset(h, 0, s->head_size); @@ -329,9 +330,9 @@ sl_free(slab *s, void *oo) if (s->num_empty_heads >= MAX_EMPTY_HEADS) { #ifdef POISON - memset(h, 0xde, get_page_size()); + memset(h, 0xde, page_size); #endif - free_page(h); + free_page(s->p, h); } else { @@ -348,11 +349,11 @@ slab_free(resource *r) struct sl_head *h, *g; WALK_LIST_DELSAFE(h, g, s->empty_heads) - free_page(h); + free_page(s->p, h); WALK_LIST_DELSAFE(h, g, s->partial_heads) - free_page(h); + free_page(s->p, h); WALK_LIST_DELSAFE(h, g, s->full_heads) - free_page(h); + free_page(s->p, h); } static void @@ -385,7 +386,8 @@ slab_memsize(resource *r) WALK_LIST(h, s->full_heads) heads++; - return ALLOC_OVERHEAD + sizeof(struct slab) + heads * (ALLOC_OVERHEAD + get_page_size()); +// return ALLOC_OVERHEAD + sizeof(struct slab) + heads * (ALLOC_OVERHEAD + page_size); + return ALLOC_OVERHEAD + sizeof(struct slab); /* The page sizes are accounted for in the pool */ } static resource * @@ -395,10 +397,10 @@ slab_lookup(resource *r, unsigned long a) struct sl_head *h; WALK_LIST(h, s->partial_heads) - if ((unsigned long) h < a && (unsigned long) h + get_page_size() < a) + if ((unsigned long) h < a && (unsigned long) h + page_size < a) return r; WALK_LIST(h, s->full_heads) - if ((unsigned long) h < a && (unsigned long) h + get_page_size() < a) + if ((unsigned long) h < a && (unsigned long) h + page_size < a) return r; return NULL; } diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c index c525f713..f6296afe 100644 --- a/sysdep/unix/alloc.c +++ b/sysdep/unix/alloc.c @@ -16,41 +16,36 @@ #include #endif +long page_size = 0; + #ifdef HAVE_MMAP -static u64 page_size = 0; static _Bool use_fake = 0; #else -static const u64 page_size = 4096; /* Fake page size */ +static _Bool use_fake = 1; #endif -u64 get_page_size(void) +void resource_sys_init(void) { - if (page_size) - return page_size; - #ifdef HAVE_MMAP - if (page_size = sysconf(_SC_PAGESIZE)) - { - if ((u64_popcount(page_size) > 1) || (page_size > 16384)) - { - /* Too big or strange page, use the aligned allocator instead */ - page_size = 4096; - use_fake = 1; - } - return page_size; - } + if (!(page_size = sysconf(_SC_PAGESIZE))) + die("System page size must be non-zero"); - bug("Page size must be non-zero"); + if ((u64_popcount(page_size) > 1) || (page_size > 16384)) + { #endif + /* Too big or strange page, use the aligned allocator instead */ + page_size = 4096; + use_fake = 1; + } } void * -alloc_page(void) +alloc_sys_page(void) { #ifdef HAVE_MMAP if (!use_fake) { - void *ret = mmap(NULL, get_page_size(), PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + void *ret = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (ret == MAP_FAILED) bug("mmap(%lu) failed: %m", page_size); return ret; @@ -66,12 +61,12 @@ alloc_page(void) } void -free_page(void *ptr) +free_sys_page(void *ptr) { #ifdef HAVE_MMAP if (!use_fake) { - if (munmap(ptr, get_page_size()) < 0) + if (munmap(ptr, page_size) < 0) bug("munmap(%p) failed: %m", ptr); } else From bea582cbb53e30dd32a5b6829c7443e0e5558d11 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Wed, 13 Oct 2021 18:59:45 +0200 Subject: [PATCH 02/11] fixup! Bound allocated pages to resource pools with page caches to avoid unnecessary syscalls --- lib/slab.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/slab.c b/lib/slab.c index 70aa776a..2fc88ee0 100644 --- a/lib/slab.c +++ b/lib/slab.c @@ -178,7 +178,7 @@ struct sl_alignment { /* Magic structure for testing of alignment */ int x[0]; }; -#define SL_GET_HEAD(x) ((struct sl_head *) (((uintptr_t) (x)) & ~(get_page_size()-1))) +#define SL_GET_HEAD(x) ((struct sl_head *) (((uintptr_t) (x)) & ~(page_size-1))) /** * sl_new - create a new Slab From e5a8eec6d720408139d0a209ef149848c5dcfbe3 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Fri, 3 Sep 2021 19:48:38 +0200 Subject: [PATCH 03/11] Linpools may use pages instead of xmalloc --- lib/mempool.c | 21 ++++++++++++++++++--- lib/resource.h | 2 +- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/lib/mempool.c b/lib/mempool.c index 758882ce..8f300b81 100644 --- a/lib/mempool.c +++ b/lib/mempool.c @@ -37,9 +37,10 @@ const int lp_chunk_size = sizeof(struct lp_chunk); struct linpool { resource r; byte *ptr, *end; + pool *p; struct lp_chunk *first, *current; /* Normal (reusable) chunks */ struct lp_chunk *first_large; /* Large chunks */ - uint chunk_size, threshold, total, total_large; + uint chunk_size, threshold, total:31, use_pages:1, total_large; }; static void lp_free(resource *); @@ -69,6 +70,13 @@ linpool *lp_new(pool *p, uint blk) { linpool *m = ralloc(p, &lp_class); + m->p = p; + if (!blk) + { + m->use_pages = 1; + blk = page_size - lp_chunk_size; + } + m->chunk_size = blk; m->threshold = 3*blk/4; return m; @@ -121,7 +129,11 @@ lp_alloc(linpool *m, uint size) else { /* Need to allocate a new chunk */ - c = xmalloc(sizeof(struct lp_chunk) + m->chunk_size); + if (m->use_pages) + c = alloc_page(m->p); + else + c = xmalloc(sizeof(struct lp_chunk) + m->chunk_size); + m->total += m->chunk_size; c->next = NULL; c->size = m->chunk_size; @@ -258,7 +270,10 @@ lp_free(resource *r) for(d=m->first; d; d = c) { c = d->next; - xfree(d); + if (m->use_pages) + free_page(m->p, d); + else + xfree(d); } for(d=m->first_large; d; d = c) { diff --git a/lib/resource.h b/lib/resource.h index 597d6c17..26030aea 100644 --- a/lib/resource.h +++ b/lib/resource.h @@ -76,7 +76,7 @@ void lp_restore(linpool *m, lp_state *p); /* Restore state */ extern const int lp_chunk_size; #define LP_GAS 1024 #define LP_GOOD_SIZE(x) (((x + LP_GAS - 1) & (~(LP_GAS - 1))) - lp_chunk_size) -#define lp_new_default(p) lp_new(p, LP_GOOD_SIZE(LP_GAS*4)) +#define lp_new_default(p) lp_new(p, 0) /* Slabs */ From d322ee3d548a87d6e996dd20b2b415aad4b53f62 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Sun, 5 Sep 2021 13:00:08 +0200 Subject: [PATCH 04/11] OSPF: explicitly stop the periodic tick on shutdown to avoid recalculation races --- proto/ospf/ospf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c index ba8c2e2b..ebebf0ff 100644 --- a/proto/ospf/ospf.c +++ b/proto/ospf/ospf.c @@ -558,6 +558,9 @@ ospf_shutdown(struct proto *P) } FIB_WALK_END; + if (tm_active(p->disp_timer)) + tm_stop(p->disp_timer); + return PS_DOWN; } From 3a31c3aad6c53ea9673743f983e13728d8551149 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Tue, 7 Sep 2021 16:22:32 +0000 Subject: [PATCH 05/11] CLI socket accept() may also fail and should produce some message, not a coredump. --- sysdep/unix/main.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c index 392aff9d..cdf0a310 100644 --- a/sysdep/unix/main.c +++ b/sysdep/unix/main.c @@ -479,6 +479,14 @@ cli_err(sock *s, int err) cli_free(s->data); } +static void +cli_connect_err(sock *s UNUSED, int err) +{ + ASSERT_DIE(err); + if (config->cli_debug) + log(L_INFO "Failed to accept CLI connection: %s", strerror(err)); +} + static int cli_connect(sock *s, uint size UNUSED) { @@ -507,6 +515,7 @@ cli_init_unix(uid_t use_uid, gid_t use_gid) s = cli_sk = sk_new(cli_pool); s->type = SK_UNIX_PASSIVE; s->rx_hook = cli_connect; + s->err_hook = cli_connect_err; s->rbsize = 1024; s->fast_rx = 1; From 6cd37713781a3092f8166b2178fae35cbfec1e28 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Wed, 8 Sep 2021 11:29:49 +0200 Subject: [PATCH 06/11] Multipage allocation We can also quite simply allocate bigger blocks. Anyway, we need these blocks to be aligned to their size which needs one mmap() two times bigger and then two munmap()s returning the unaligned parts. The user can specify -B on startup when is the exponent of 2, setting the block size to 2^N. On most systems, N is 12, anyway if you know that your configuration is going to eat gigabytes of RAM, you are almost forced to raise your block size as you may easily get into memory fragmentation issues or you have to raise your maximum mapping count, e.g. "sysctl vm.max_map_count=(number)". --- doc/bird.sgml | 7 +++++++ lib/resource.c | 2 -- lib/slab.c | 2 +- sysdep/unix/alloc.c | 23 +++++++++++++++++++++++ sysdep/unix/main.c | 32 +++++++++++++++++++++++++++++++- 5 files changed, 62 insertions(+), 4 deletions(-) diff --git a/doc/bird.sgml b/doc/bird.sgml index 39dadaf2..ddad4d98 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -145,6 +145,13 @@ BIRD executable by configuring out routing protocols you don't use, and

You can pass several command-line options to bird: +

You can pass several command-line options to bird: -