diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c index 2800a8ba..e7c4e6b0 100644 --- a/sysdep/unix/alloc.c +++ b/sysdep/unix/alloc.c @@ -50,10 +50,10 @@ struct empty_pages { }; struct free_pages { - list pages; - list empty; + list pages; /* List of (struct free_page) keeping free pages without releasing them (hot) */ + list empty; /* List of (struct empty_pages) keeping invalidated pages mapped for us (cold) */ u16 min, max; /* Minimal and maximal number of free pages kept */ - uint cnt; /* Number of empty pages */ + uint cnt; /* Number of free pages in list */ event cleanup; }; @@ -87,6 +87,7 @@ extern int shutting_down; /* Shutdown requested. */ void * alloc_page(void) { + /* If the system page allocator is goofy, we use posix_memalign to get aligned blocks of memory. */ if (use_fake) { void *ptr = NULL; @@ -101,27 +102,34 @@ alloc_page(void) #ifdef HAVE_MMAP struct free_pages *fps = &global_free_pages; + /* If there is any free page kept hot, we use it. */ if (fps->cnt) { struct free_page *fp = SKIP_BACK(struct free_page, n, HEAD(fps->pages)); rem_node(&fp->n); + + /* If the hot-free-page cache is getting short, request the cleanup routine to replenish the cache */ if ((--fps->cnt < fps->min) && !shutting_down) ev_schedule(&fps->cleanup); - bzero(fp, page_size); return fp; } + /* If there is any free page kept cold, we use that. */ if (!EMPTY_LIST(fps->empty)) { struct empty_pages *ep = HEAD(fps->empty); + + /* Either the keeper page contains at least one cold page pointer, return that */ if (ep->pos) return ep->pages[--ep->pos]; + /* Or the keeper page has no more cold page pointer, return the keeper page */ rem_node(&ep->n); return ep; } + /* And in the worst case, allocate a new page by mmap() */ return alloc_sys_page(); #endif } @@ -129,6 +137,7 @@ alloc_page(void) void free_page(void *ptr) { + /* If the system page allocator is goofy, we just free the block and care no more. */ if (use_fake) { free(ptr); @@ -139,9 +148,11 @@ free_page(void *ptr) struct free_pages *fps = &global_free_pages; struct free_page *fp = ptr; + /* Otherwise, we add the free page to the hot-free-page list */ fp->n = (node) {}; add_tail(&fps->pages, &fp->n); + /* And if there are too many hot free pages, we ask for page cleanup */ if ((++fps->cnt > fps->max) && !shutting_down) ev_schedule(&fps->cleanup); #endif @@ -151,11 +162,13 @@ free_page(void *ptr) static void global_free_pages_cleanup_event(void *data UNUSED) { + /* Cleanup on shutdown is ignored. All pages may be kept hot, OS will take care. */ if (shutting_down) return; struct free_pages *fps = &global_free_pages; + /* Cleanup may get called when hot free page cache is short of pages. Replenishing. */ while (fps->cnt / 2 < fps->min) { struct free_page *fp = alloc_sys_page(); @@ -164,22 +177,25 @@ global_free_pages_cleanup_event(void *data UNUSED) fps->cnt++; } - int limit = CLEANUP_PAGES_BULK; - while (--limit && (fps->cnt > fps->max / 2)) + /* Or the hot free page cache is too big. Moving some pages to the cold free page cache. */ + for (int limit = CLEANUP_PAGES_BULK; limit && (fps->cnt > fps->max / 2); fps->cnt--, limit--) { struct free_page *fp = SKIP_BACK(struct free_page, n, TAIL(fps->pages)); rem_node(&fp->n); - fps->cnt--; + /* Empty pages are stored as pointers. To store them, we need a pointer block. */ struct empty_pages *ep; if (EMPTY_LIST(fps->empty) || ((ep = HEAD(fps->empty))->pos == EP_POS_MAX)) { + /* There is either no pointer block or the last block is full. We use this block as a pointer block. */ ep = (struct empty_pages *) fp; *ep = (struct empty_pages) {}; add_head(&fps->empty, &ep->n); } else { + /* We store this block as a pointer into the first free place + * and tell the OS that the underlying memory is trash. */ ep->pages[ep->pos++] = fp; if (madvise(fp, page_size, #ifdef CONFIG_MADV_DONTNEED_TO_FREE @@ -192,7 +208,9 @@ global_free_pages_cleanup_event(void *data UNUSED) } } - if (!limit) + /* If the hot free page cleanup hit the limit, re-schedule this routine + * to allow for other routines to run. */ + if (fps->cnt > fps->max) ev_schedule(&fps->cleanup); } #endif @@ -203,11 +221,15 @@ resource_sys_init(void) #ifdef HAVE_MMAP ASSERT_DIE(global_free_pages.cnt == 0); + /* Check what page size the system supports */ if (!(page_size = sysconf(_SC_PAGESIZE))) die("System page size must be non-zero"); - if (u64_popcount(page_size) == 1) + if ((u64_popcount(page_size) == 1) && (page_size >= (1 << 10)) && (page_size <= (1 << 18))) { + /* We assume that page size has only one bit and is between 1K and 256K (incl.). + * Otherwise, the assumptions in lib/slab.c (sl_head's num_full range) aren't met. */ + struct free_pages *fps = &global_free_pages; init_list(&fps->pages); @@ -217,7 +239,7 @@ resource_sys_init(void) } /* Too big or strange page, use the aligned allocator instead */ - log(L_WARN "Got strange memory page size (%lu), using the aligned allocator instead", page_size); + log(L_WARN "Got strange memory page size (%ld), using the aligned allocator instead", (s64) page_size); use_fake = 1; #endif