From 886dd92eeefa070d8db6aaf0245a67f7a9e9b983 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Wed, 22 Jul 2020 00:09:15 +0200 Subject: [PATCH] Slab: head now uses bitmask for used/free nodes info instead of lists From now, there are no auxiliary pointers stored in the free slab nodes. This led to strange debugging problems if use-after-free happened in slab-allocated structures, especially if the structure's first member is a next pointer. This also reduces the memory needed by 1 pointer per allocated object. OTOH, we now rely on pages being aligned to their size's multiple, which is quite common anyway. --- configure.ac | 1 + lib/bitops.h | 1 + lib/resource.h | 4 ++ lib/slab.c | 132 +++++++++++++++++++++---------------------- sysdep/unix/Makefile | 2 +- sysdep/unix/alloc.c | 80 ++++++++++++++++++++++++++ 6 files changed, 150 insertions(+), 70 deletions(-) create mode 100644 sysdep/unix/alloc.c diff --git a/configure.ac b/configure.ac index eabb3d56..7d92a5d4 100644 --- a/configure.ac +++ b/configure.ac @@ -351,6 +351,7 @@ case $sysdesc in esac AC_CHECK_HEADERS_ONCE([alloca.h syslog.h]) +AC_CHECK_HEADER([sys/mman.h], [AC_DEFINE([HAVE_MMAP], [1], [Define to 1 if mmap() is available.])]) AC_CHECK_MEMBERS([struct sockaddr.sa_len], [], [], [#include ]) AC_C_BIGENDIAN( diff --git a/lib/bitops.h b/lib/bitops.h index a4c48a10..0beda2a3 100644 --- a/lib/bitops.h +++ b/lib/bitops.h @@ -28,6 +28,7 @@ u32 u32_log2(u32 v); static inline u32 u32_hash(u32 v) { return v * 2902958171u; } static inline u8 u32_popcount(u32 v) { return __builtin_popcount(v); } +static inline u8 u64_popcount(u64 v) { return __builtin_popcountll(v); } static inline int u32_clz(u32 v) { return __builtin_clz(v); } static inline int u32_ctz(u32 v) { return __builtin_ctz(v); } diff --git a/lib/resource.h b/lib/resource.h index b56bcff5..48e62985 100644 --- a/lib/resource.h +++ b/lib/resource.h @@ -93,6 +93,10 @@ void sl_free(slab *, void *); void buffer_realloc(void **buf, unsigned *size, unsigned need, unsigned item_size); +/* Allocator of whole pages; for use in slabs and other high-level allocators. */ +u64 get_page_size(void); +void *alloc_page(void); +void free_page(void *); #ifdef HAVE_LIBDMALLOC /* diff --git a/lib/slab.c b/lib/slab.c index f31355e0..b0a01ae7 100644 --- a/lib/slab.c +++ b/lib/slab.c @@ -4,6 +4,7 @@ * Heavily inspired by the original SLAB paper by Jeff Bonwick. * * (c) 1998--2000 Martin Mares + * (c) 2020 Maria Matejka * * Can be freely distributed and used under the terms of the GNU GPL. */ @@ -147,12 +148,12 @@ slab_memsize(resource *r) * Real efficient version. */ -#define SLAB_SIZE 4096 #define MAX_EMPTY_HEADS 1 struct slab { resource r; - uint obj_size, head_size, objs_per_slab, num_empty_heads, data_size; + uint obj_size, head_size, head_bitfield_len; + uint objs_per_slab, num_empty_heads, data_size; list empty_heads, partial_heads, full_heads; }; @@ -167,16 +168,8 @@ static struct resclass sl_class = { struct sl_head { node n; - struct sl_obj *first_free; - int num_full; -}; - -struct sl_obj { - struct sl_head *slab; - union { - struct sl_obj *next; - byte data[0]; - } u; + u32 num_full; + u32 used_bits[0]; }; struct sl_alignment { /* Magic structure for testing of alignment */ @@ -184,6 +177,8 @@ struct sl_alignment { /* Magic structure for testing of alignment */ int x[0]; }; +#define SL_GET_HEAD(x) ((struct sl_head *) (((uintptr_t) (x)) & ~(get_page_size()-1))) + /** * sl_new - create a new Slab * @p: resource pool @@ -200,45 +195,32 @@ sl_new(pool *p, uint size) if (align < sizeof(int)) align = sizeof(int); s->data_size = size; - size += OFFSETOF(struct sl_obj, u.data); - if (size < sizeof(struct sl_obj)) - size = sizeof(struct sl_obj); size = (size + align - 1) / align * align; s->obj_size = size; - s->head_size = (sizeof(struct sl_head) + align - 1) / align * align; - s->objs_per_slab = (SLAB_SIZE - s->head_size) / size; + + s->head_size = sizeof(struct sl_head); + u64 page_size = get_page_size(); + + do { + s->objs_per_slab = (page_size - s->head_size) / size; + s->head_bitfield_len = (s->objs_per_slab + 31) / 32; + s->head_size = ( + sizeof(struct sl_head) + + sizeof(u32) * s->head_bitfield_len + + align - 1) + / align * align; + } while (s->objs_per_slab * size + s->head_size > page_size); + if (!s->objs_per_slab) bug("Slab: object too large"); s->num_empty_heads = 0; + init_list(&s->empty_heads); init_list(&s->partial_heads); init_list(&s->full_heads); return s; } -static struct sl_head * -sl_new_head(slab *s) -{ - struct sl_head *h = xmalloc(SLAB_SIZE); - struct sl_obj *o = (struct sl_obj *)((byte *)h+s->head_size); - struct sl_obj *no; - uint n = s->objs_per_slab; - - *h = (struct sl_head) { - .first_free = o, - .num_full = 0, - }; - - while (n--) - { - o->slab = h; - no = (struct sl_obj *)((char *) o+s->obj_size); - o->u.next = n ? no : NULL; - o = no; - } - return h; -} - /** * sl_alloc - allocate an object from Slab * @s: slab @@ -250,24 +232,29 @@ void * sl_alloc(slab *s) { struct sl_head *h; - struct sl_obj *o; redo: h = HEAD(s->partial_heads); if (!h->n.next) goto no_partial; okay: - o = h->first_free; - if (!o) - goto full_partial; - h->first_free = o->u.next; - h->num_full++; -#ifdef POISON - memset(o->u.data, 0xcd, s->data_size); -#endif - return o->u.data; + for (uint i=0; ihead_bitfield_len; i++) + if (~h->used_bits[i]) + { + uint pos = u32_ctz(~h->used_bits[i]); + if (i * 32 + pos >= s->objs_per_slab) + break; + + h->used_bits[i] |= 1 << pos; + h->num_full++; + + void *out = ((void *) h) + s->head_size + (i * 32 + pos) * s->obj_size; +#ifdef POISON + memset(out, 0xcd, s->data_size); +#endif + return out; + } -full_partial: rem_node(&h->n); add_tail(&s->full_heads, &h->n); goto redo; @@ -281,7 +268,9 @@ no_partial: s->num_empty_heads--; goto okay; } - h = sl_new_head(s); + h = alloc_page(); + ASSERT_DIE(SL_GET_HEAD(h) == h); + memset(h, 0, s->head_size); add_head(&s->partial_heads, &h->n); goto okay; } @@ -313,30 +302,35 @@ sl_allocz(slab *s) void sl_free(slab *s, void *oo) { - struct sl_obj *o = SKIP_BACK(struct sl_obj, u.data, oo); - struct sl_head *h = o->slab; + struct sl_head *h = SL_GET_HEAD(oo); #ifdef POISON memset(oo, 0xdb, s->data_size); #endif - o->u.next = h->first_free; - h->first_free = o; - if (!--h->num_full) + + uint offset = oo - ((void *) h) - s->head_size; + ASSERT_DIE(offset % s->obj_size == 0); + uint pos = offset / s->obj_size; + ASSERT_DIE(pos < s->objs_per_slab); + + h->used_bits[pos / 32] &= ~(1 << (pos % 32)); + + if (h->num_full-- == s->objs_per_slab) + { + rem_node(&h->n); + add_head(&s->partial_heads, &h->n); + } + else if (!h->num_full) { rem_node(&h->n); if (s->num_empty_heads >= MAX_EMPTY_HEADS) - xfree(h); + free_page(h); else { add_head(&s->empty_heads, &h->n); s->num_empty_heads++; } } - else if (!o->u.next) - { - rem_node(&h->n); - add_head(&s->partial_heads, &h->n); - } } static void @@ -346,11 +340,11 @@ slab_free(resource *r) struct sl_head *h, *g; WALK_LIST_DELSAFE(h, g, s->empty_heads) - xfree(h); + free_page(h); WALK_LIST_DELSAFE(h, g, s->partial_heads) - xfree(h); + free_page(h); WALK_LIST_DELSAFE(h, g, s->full_heads) - xfree(h); + free_page(h); } static void @@ -383,7 +377,7 @@ slab_memsize(resource *r) WALK_LIST(h, s->full_heads) heads++; - return ALLOC_OVERHEAD + sizeof(struct slab) + heads * (ALLOC_OVERHEAD + SLAB_SIZE); + return ALLOC_OVERHEAD + sizeof(struct slab) + heads * (ALLOC_OVERHEAD + get_page_size()); } static resource * @@ -393,10 +387,10 @@ slab_lookup(resource *r, unsigned long a) struct sl_head *h; WALK_LIST(h, s->partial_heads) - if ((unsigned long) h < a && (unsigned long) h + SLAB_SIZE < a) + if ((unsigned long) h < a && (unsigned long) h + get_page_size() < a) return r; WALK_LIST(h, s->full_heads) - if ((unsigned long) h < a && (unsigned long) h + SLAB_SIZE < a) + if ((unsigned long) h < a && (unsigned long) h + get_page_size() < a) return r; return NULL; } diff --git a/sysdep/unix/Makefile b/sysdep/unix/Makefile index f592399c..d0d36b5f 100644 --- a/sysdep/unix/Makefile +++ b/sysdep/unix/Makefile @@ -1,4 +1,4 @@ -src := io.c krt.c log.c main.c random.c +src := alloc.c io.c krt.c log.c main.c random.c obj := $(src-o-files) $(all-daemon) $(cf-local) diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c new file mode 100644 index 00000000..c525f713 --- /dev/null +++ b/sysdep/unix/alloc.c @@ -0,0 +1,80 @@ +/* + * BIRD Internet Routing Daemon -- Raw allocation + * + * (c) 2020 Maria Matejka + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include "nest/bird.h" +#include "lib/resource.h" + +#include +#include + +#ifdef HAVE_MMAP +#include +#endif + +#ifdef HAVE_MMAP +static u64 page_size = 0; +static _Bool use_fake = 0; +#else +static const u64 page_size = 4096; /* Fake page size */ +#endif + +u64 get_page_size(void) +{ + if (page_size) + return page_size; + +#ifdef HAVE_MMAP + if (page_size = sysconf(_SC_PAGESIZE)) + { + if ((u64_popcount(page_size) > 1) || (page_size > 16384)) + { + /* Too big or strange page, use the aligned allocator instead */ + page_size = 4096; + use_fake = 1; + } + return page_size; + } + + bug("Page size must be non-zero"); +#endif +} + +void * +alloc_page(void) +{ +#ifdef HAVE_MMAP + if (!use_fake) + { + void *ret = mmap(NULL, get_page_size(), PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (ret == MAP_FAILED) + bug("mmap(%lu) failed: %m", page_size); + return ret; + } + else +#endif + { + void *ret = aligned_alloc(page_size, page_size); + if (!ret) + bug("aligned_alloc(%lu) failed", page_size); + return ret; + } +} + +void +free_page(void *ptr) +{ +#ifdef HAVE_MMAP + if (!use_fake) + { + if (munmap(ptr, get_page_size()) < 0) + bug("munmap(%p) failed: %m", ptr); + } + else +#endif + free(ptr); +}