From 9d03c3f56ced3d3191982f57029f9a3d12fa2e5a Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Tue, 1 Nov 2022 18:40:56 +0100 Subject: [PATCH] Memory pages are not munmapped, instead we just madvise() Memory unmapping causes slow address space fragmentation, leading in extreme cases to failing to allocate pages at all. Removing this problem by keeping all the pages allocated to us, yet calling madvise() to let kernel dispose of them. This adds a little complexity and overhead as we have to keep the pointers to the free pages, therefore to hold e.g. 1 GB of 4K pages with 8B pointers, we have to store 2 MB of data. --- sysdep/cf/linux.h | 1 + sysdep/unix/alloc.c | 50 +++++++++++++++++++++++++++++++++++++++------ 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/sysdep/cf/linux.h b/sysdep/cf/linux.h index c640bef4..56ecf017 100644 --- a/sysdep/cf/linux.h +++ b/sysdep/cf/linux.h @@ -20,6 +20,7 @@ #define CONFIG_RESTRICTED_PRIVILEGES #define CONFIG_INCLUDE_SYSPRIV_H "sysdep/linux/syspriv.h" +#define CONFIG_MADV_DONTNEED_TO_FREE #ifndef AF_MPLS #define AF_MPLS 28 diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c index edad6209..2800a8ba 100644 --- a/sysdep/unix/alloc.c +++ b/sysdep/unix/alloc.c @@ -41,8 +41,17 @@ struct free_page { }; #endif +#define EP_POS_MAX ((page_size - OFFSETOF(struct empty_pages, pages)) / sizeof (void *)) + +struct empty_pages { + node n; + uint pos; + void *pages[0]; +}; + struct free_pages { list pages; + list empty; u16 min, max; /* Minimal and maximal number of free pages kept */ uint cnt; /* Number of empty pages */ event cleanup; @@ -103,6 +112,16 @@ alloc_page(void) return fp; } + if (!EMPTY_LIST(fps->empty)) + { + struct empty_pages *ep = HEAD(fps->empty); + if (ep->pos) + return ep->pages[--ep->pos]; + + rem_node(&ep->n); + return ep; + } + return alloc_sys_page(); #endif } @@ -145,18 +164,36 @@ global_free_pages_cleanup_event(void *data UNUSED) fps->cnt++; } - for (uint seen = 0; (seen < CLEANUP_PAGES_BULK) && (fps->cnt > fps->max / 2); seen++) + int limit = CLEANUP_PAGES_BULK; + while (--limit && (fps->cnt > fps->max / 2)) { struct free_page *fp = SKIP_BACK(struct free_page, n, TAIL(fps->pages)); rem_node(&fp->n); + fps->cnt--; - if (munmap(fp, page_size) == 0) - fps->cnt--; - else if (errno == ENOMEM) - add_head(&fps->pages, &fp->n); + struct empty_pages *ep; + if (EMPTY_LIST(fps->empty) || ((ep = HEAD(fps->empty))->pos == EP_POS_MAX)) + { + ep = (struct empty_pages *) fp; + *ep = (struct empty_pages) {}; + add_head(&fps->empty, &ep->n); + } else - bug("munmap(%p) failed: %m", fp); + { + ep->pages[ep->pos++] = fp; + if (madvise(fp, page_size, +#ifdef CONFIG_MADV_DONTNEED_TO_FREE + MADV_DONTNEED +#else + MADV_FREE +#endif + ) < 0) + bug("madvise(%p) failed: %m", fp); + } } + + if (!limit) + ev_schedule(&fps->cleanup); } #endif @@ -174,6 +211,7 @@ resource_sys_init(void) struct free_pages *fps = &global_free_pages; init_list(&fps->pages); + init_list(&fps->empty); global_free_pages_cleanup_event(NULL); return; }