Memory pages are not munmapped, instead we just madvise()

Memory unmapping causes slow address space fragmentation, leading in
extreme cases to failing to allocate pages at all. Removing this problem
by keeping all the pages allocated to us, yet calling madvise() to let
kernel dispose of them.

This adds a little complexity and overhead as we have to keep the
pointers to the free pages, therefore to hold e.g. 1 GB of 4K pages with
8B pointers, we have to store 2 MB of data.
This commit is contained in:
Maria Matejka 2022-11-01 18:40:56 +01:00
parent 37b6444137
commit 9d03c3f56c
2 changed files with 45 additions and 6 deletions

View file

@ -20,6 +20,7 @@
#define CONFIG_RESTRICTED_PRIVILEGES #define CONFIG_RESTRICTED_PRIVILEGES
#define CONFIG_INCLUDE_SYSPRIV_H "sysdep/linux/syspriv.h" #define CONFIG_INCLUDE_SYSPRIV_H "sysdep/linux/syspriv.h"
#define CONFIG_MADV_DONTNEED_TO_FREE
#ifndef AF_MPLS #ifndef AF_MPLS
#define AF_MPLS 28 #define AF_MPLS 28

View file

@ -41,8 +41,17 @@ struct free_page {
}; };
#endif #endif
#define EP_POS_MAX ((page_size - OFFSETOF(struct empty_pages, pages)) / sizeof (void *))
struct empty_pages {
node n;
uint pos;
void *pages[0];
};
struct free_pages { struct free_pages {
list pages; list pages;
list empty;
u16 min, max; /* Minimal and maximal number of free pages kept */ u16 min, max; /* Minimal and maximal number of free pages kept */
uint cnt; /* Number of empty pages */ uint cnt; /* Number of empty pages */
event cleanup; event cleanup;
@ -103,6 +112,16 @@ alloc_page(void)
return fp; return fp;
} }
if (!EMPTY_LIST(fps->empty))
{
struct empty_pages *ep = HEAD(fps->empty);
if (ep->pos)
return ep->pages[--ep->pos];
rem_node(&ep->n);
return ep;
}
return alloc_sys_page(); return alloc_sys_page();
#endif #endif
} }
@ -145,18 +164,36 @@ global_free_pages_cleanup_event(void *data UNUSED)
fps->cnt++; fps->cnt++;
} }
for (uint seen = 0; (seen < CLEANUP_PAGES_BULK) && (fps->cnt > fps->max / 2); seen++) int limit = CLEANUP_PAGES_BULK;
while (--limit && (fps->cnt > fps->max / 2))
{ {
struct free_page *fp = SKIP_BACK(struct free_page, n, TAIL(fps->pages)); struct free_page *fp = SKIP_BACK(struct free_page, n, TAIL(fps->pages));
rem_node(&fp->n); rem_node(&fp->n);
fps->cnt--;
if (munmap(fp, page_size) == 0) struct empty_pages *ep;
fps->cnt--; if (EMPTY_LIST(fps->empty) || ((ep = HEAD(fps->empty))->pos == EP_POS_MAX))
else if (errno == ENOMEM) {
add_head(&fps->pages, &fp->n); ep = (struct empty_pages *) fp;
*ep = (struct empty_pages) {};
add_head(&fps->empty, &ep->n);
}
else else
bug("munmap(%p) failed: %m", fp); {
ep->pages[ep->pos++] = fp;
if (madvise(fp, page_size,
#ifdef CONFIG_MADV_DONTNEED_TO_FREE
MADV_DONTNEED
#else
MADV_FREE
#endif
) < 0)
bug("madvise(%p) failed: %m", fp);
}
} }
if (!limit)
ev_schedule(&fps->cleanup);
} }
#endif #endif
@ -174,6 +211,7 @@ resource_sys_init(void)
struct free_pages *fps = &global_free_pages; struct free_pages *fps = &global_free_pages;
init_list(&fps->pages); init_list(&fps->pages);
init_list(&fps->empty);
global_free_pages_cleanup_event(NULL); global_free_pages_cleanup_event(NULL);
return; return;
} }