diff mbox

[01/15] coroutine-ucontext: mmap stack memory

Message ID 1467104499-27517-2-git-send-email-pl@kamp.de
State New
Headers show

Commit Message

Peter Lieven June 28, 2016, 9:01 a.m. UTC
coroutine-ucontext currently allocates stack memory from heap as on most systems the
stack size lays below the threshold for mmapping memory. This patch forces mmaping
of stacks to avoid large holes on the heap when a coroutine is deleted. It additionally
allows us for adding a guard page at the bottom of the stack to avoid overflows.

Suggested-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Lieven <pl@kamp.de>
---
 util/coroutine-ucontext.c | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

Comments

Peter Maydell June 28, 2016, 10:02 a.m. UTC | #1
On 28 June 2016 at 10:01, Peter Lieven <pl@kamp.de> wrote:
> coroutine-ucontext currently allocates stack memory from heap as on most systems the
> stack size lays below the threshold for mmapping memory. This patch forces mmaping
> of stacks to avoid large holes on the heap when a coroutine is deleted. It additionally
> allows us for adding a guard page at the bottom of the stack to avoid overflows.
>
> Suggested-by: Peter Maydell <peter.maydell@linaro.org>
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
>  util/coroutine-ucontext.c | 26 +++++++++++++++++++++++---
>  1 file changed, 23 insertions(+), 3 deletions(-)
>
> diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c
> index 2bb7e10..841e7db 100644
> --- a/util/coroutine-ucontext.c
> +++ b/util/coroutine-ucontext.c
> @@ -80,9 +80,10 @@ static void coroutine_trampoline(int i0, int i1)
>      }
>  }
>
> +#define COROUTINE_STACK_SIZE (1 << 20)
> +
>  Coroutine *qemu_coroutine_new(void)
>  {
> -    const size_t stack_size = 1 << 20;
>      CoroutineUContext *co;
>      ucontext_t old_uc, uc;
>      sigjmp_buf old_env;
> @@ -101,17 +102,32 @@ Coroutine *qemu_coroutine_new(void)
>      }
>
>      co = g_malloc0(sizeof(*co));
> +
> +#ifdef MAP_GROWSDOWN
> +    co->stack = mmap(NULL, COROUTINE_STACK_SIZE, PROT_READ | PROT_WRITE,
> +                     MAP_PRIVATE | MAP_ANONYMOUS | MAP_GROWSDOWN, -1, 0);
> +    if (co->stack == MAP_FAILED) {
> +        abort();
> +    }
> +    /* add a guard page at bottom of the stack */
> +    if (mmap(co->stack, getpagesize(), PROT_NONE,
> +        MAP_PRIVATE | MAP_ANONYMOUS | MAP_GROWSDOWN, -1, 0) == MAP_FAILED) {
> +        abort();
> +    }
> +#else
>      co->stack = g_malloc(stack_size);

I would just mmap() always; then we get the benefit of the
guard page even if there's no MAP_GROWSDOWN.

Also, does MAP_GROWSDOWN help with the RSS issues? I
noticed that glibc itself doesn't use it for pthread
stacks as far as I can tell, so maybe it's obsolete?
(Ulrich Drepper apparently thought so in 2008:
https://lwn.net/Articles/294001/ )

> +#endif

Can we abstract this out into an alloc/dealloc function, please?

/**
 * qemu_alloc_stack:
 * @sz: size of required stack in bytes
 *
 * Allocate memory that can be used as a stack, for instance for
 * coroutines. If the memory cannot be allocated, this function
 * will abort (like g_malloc()). The allocated stack should be
 * freed with qemu_free_stack().
 *
 * Returns: pointer to (the lowest address of) the stack memory.
 */
void *qemu_alloc_stack(size_t sz);

/**
 * qemu_free_stack:
 * @stack: stack to free
 *
 * Free a stack allocated via qemu_alloc_stack().
 */
void qemu_free_stack(void *stack);

util/coroutine-sigaltstack.c can then use the same function
for stack allocation.

I would put the implementation in util/oslib-posix.c and the
header in include/sysemu/os-posix.h, unless somebody has a
better idea.

> +
>      co->base.entry_arg = &old_env; /* stash away our jmp_buf */
>
>      uc.uc_link = &old_uc;
>      uc.uc_stack.ss_sp = co->stack;
> -    uc.uc_stack.ss_size = stack_size;
> +    uc.uc_stack.ss_size = COROUTINE_STACK_SIZE;

Because of the guard page, your code above isn't actually
allocating this much stack.

>      uc.uc_stack.ss_flags = 0;
>
>  #ifdef CONFIG_VALGRIND_H
>      co->valgrind_stack_id =
> -        VALGRIND_STACK_REGISTER(co->stack, co->stack + stack_size);
> +        VALGRIND_STACK_REGISTER(co->stack, co->stack + COROUTINE_STACK_SIZE);
>  #endif
>
>      arg.p = co;
> @@ -149,7 +165,11 @@ void qemu_coroutine_delete(Coroutine *co_)
>      valgrind_stack_deregister(co);
>  #endif
>
> +#ifdef MAP_GROWSDOWN
> +    munmap(co->stack, COROUTINE_STACK_SIZE);
> +#else
>      g_free(co->stack);
> +#endif
>      g_free(co);
>  }
>
> --
> 1.9.1

thanks
-- PMM
Peter Lieven June 28, 2016, 10:21 a.m. UTC | #2
Am 28.06.2016 um 12:02 schrieb Peter Maydell:
> On 28 June 2016 at 10:01, Peter Lieven <pl@kamp.de> wrote:
>> coroutine-ucontext currently allocates stack memory from heap as on most systems the
>> stack size lays below the threshold for mmapping memory. This patch forces mmaping
>> of stacks to avoid large holes on the heap when a coroutine is deleted. It additionally
>> allows us for adding a guard page at the bottom of the stack to avoid overflows.
>>
>> Suggested-by: Peter Maydell <peter.maydell@linaro.org>
>> Signed-off-by: Peter Lieven <pl@kamp.de>
>> ---
>>   util/coroutine-ucontext.c | 26 +++++++++++++++++++++++---
>>   1 file changed, 23 insertions(+), 3 deletions(-)
>>
>> diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c
>> index 2bb7e10..841e7db 100644
>> --- a/util/coroutine-ucontext.c
>> +++ b/util/coroutine-ucontext.c
>> @@ -80,9 +80,10 @@ static void coroutine_trampoline(int i0, int i1)
>>       }
>>   }
>>
>> +#define COROUTINE_STACK_SIZE (1 << 20)
>> +
>>   Coroutine *qemu_coroutine_new(void)
>>   {
>> -    const size_t stack_size = 1 << 20;
>>       CoroutineUContext *co;
>>       ucontext_t old_uc, uc;
>>       sigjmp_buf old_env;
>> @@ -101,17 +102,32 @@ Coroutine *qemu_coroutine_new(void)
>>       }
>>
>>       co = g_malloc0(sizeof(*co));
>> +
>> +#ifdef MAP_GROWSDOWN
>> +    co->stack = mmap(NULL, COROUTINE_STACK_SIZE, PROT_READ | PROT_WRITE,
>> +                     MAP_PRIVATE | MAP_ANONYMOUS | MAP_GROWSDOWN, -1, 0);
>> +    if (co->stack == MAP_FAILED) {
>> +        abort();
>> +    }
>> +    /* add a guard page at bottom of the stack */
>> +    if (mmap(co->stack, getpagesize(), PROT_NONE,
>> +        MAP_PRIVATE | MAP_ANONYMOUS | MAP_GROWSDOWN, -1, 0) == MAP_FAILED) {
>> +        abort();
>> +    }
>> +#else
>>       co->stack = g_malloc(stack_size);
> I would just mmap() always; then we get the benefit of the
> guard page even if there's no MAP_GROWSDOWN.
>
> Also, does MAP_GROWSDOWN help with the RSS issues? I
> noticed that glibc itself doesn't use it for pthread
> stacks as far as I can tell, so maybe it's obsolete?
> (Ulrich Drepper apparently thought so in 2008:
> https://lwn.net/Articles/294001/ )

I have seen this thread. The MAP_GROWSDOWN does not help
at all as far as it seems. Only reducing the stack size does.

>
>> +#endif
> Can we abstract this out into an alloc/dealloc function, please?
>
> /**
>   * qemu_alloc_stack:
>   * @sz: size of required stack in bytes
>   *
>   * Allocate memory that can be used as a stack, for instance for
>   * coroutines. If the memory cannot be allocated, this function
>   * will abort (like g_malloc()). The allocated stack should be
>   * freed with qemu_free_stack().
>   *
>   * Returns: pointer to (the lowest address of) the stack memory.
>   */
> void *qemu_alloc_stack(size_t sz);
>
> /**
>   * qemu_free_stack:
>   * @stack: stack to free
>   *
>   * Free a stack allocated via qemu_alloc_stack().
>   */
> void qemu_free_stack(void *stack);

we need to pass the size also for munmap.

>
> util/coroutine-sigaltstack.c can then use the same function
> for stack allocation.
>
> I would put the implementation in util/oslib-posix.c and the
> header in include/sysemu/os-posix.h, unless somebody has a
> better idea.
>
>> +
>>       co->base.entry_arg = &old_env; /* stash away our jmp_buf */
>>
>>       uc.uc_link = &old_uc;
>>       uc.uc_stack.ss_sp = co->stack;
>> -    uc.uc_stack.ss_size = stack_size;
>> +    uc.uc_stack.ss_size = COROUTINE_STACK_SIZE;
> Because of the guard page, your code above isn't actually
> allocating this much stack.

oh yes, you are right.

Peter
Paolo Bonzini June 28, 2016, 11:04 a.m. UTC | #3
On 28/06/2016 11:01, Peter Lieven wrote:
> +#ifdef MAP_GROWSDOWN
> +    co->stack = mmap(NULL, COROUTINE_STACK_SIZE, PROT_READ | PROT_WRITE,
> +                     MAP_PRIVATE | MAP_ANONYMOUS | MAP_GROWSDOWN, -1, 0);
> +    if (co->stack == MAP_FAILED) {
> +        abort();
> +    }
> +    /* add a guard page at bottom of the stack */
> +    if (mmap(co->stack, getpagesize(), PROT_NONE,
> +        MAP_PRIVATE | MAP_ANONYMOUS | MAP_GROWSDOWN, -1, 0) == MAP_FAILED) {
> +        abort();
> +    }

Nevermind, you added a guard page!  Good! :)  And actually it looks like
the stack usage has been mostly tamed, at least for the block layer.

On the other hand MAP_GROWSDOWN automatically adds a guard page since
Linux 3.9 (see commit 09884964335e, "mm: do not grow the stack vma just
because of an overrun on preceding vma", 2013-02-27), so as it turns out
you don't even need the guard page!

Paolo

> +#else
>      co->stack = g_malloc(stack_size);
> +#endif
diff mbox

Patch

diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c
index 2bb7e10..841e7db 100644
--- a/util/coroutine-ucontext.c
+++ b/util/coroutine-ucontext.c
@@ -80,9 +80,10 @@  static void coroutine_trampoline(int i0, int i1)
     }
 }
 
+#define COROUTINE_STACK_SIZE (1 << 20)
+
 Coroutine *qemu_coroutine_new(void)
 {
-    const size_t stack_size = 1 << 20;
     CoroutineUContext *co;
     ucontext_t old_uc, uc;
     sigjmp_buf old_env;
@@ -101,17 +102,32 @@  Coroutine *qemu_coroutine_new(void)
     }
 
     co = g_malloc0(sizeof(*co));
+
+#ifdef MAP_GROWSDOWN
+    co->stack = mmap(NULL, COROUTINE_STACK_SIZE, PROT_READ | PROT_WRITE,
+                     MAP_PRIVATE | MAP_ANONYMOUS | MAP_GROWSDOWN, -1, 0);
+    if (co->stack == MAP_FAILED) {
+        abort();
+    }
+    /* add a guard page at bottom of the stack */
+    if (mmap(co->stack, getpagesize(), PROT_NONE,
+        MAP_PRIVATE | MAP_ANONYMOUS | MAP_GROWSDOWN, -1, 0) == MAP_FAILED) {
+        abort();
+    }
+#else
     co->stack = g_malloc(stack_size);
+#endif
+
     co->base.entry_arg = &old_env; /* stash away our jmp_buf */
 
     uc.uc_link = &old_uc;
     uc.uc_stack.ss_sp = co->stack;
-    uc.uc_stack.ss_size = stack_size;
+    uc.uc_stack.ss_size = COROUTINE_STACK_SIZE;
     uc.uc_stack.ss_flags = 0;
 
 #ifdef CONFIG_VALGRIND_H
     co->valgrind_stack_id =
-        VALGRIND_STACK_REGISTER(co->stack, co->stack + stack_size);
+        VALGRIND_STACK_REGISTER(co->stack, co->stack + COROUTINE_STACK_SIZE);
 #endif
 
     arg.p = co;
@@ -149,7 +165,11 @@  void qemu_coroutine_delete(Coroutine *co_)
     valgrind_stack_deregister(co);
 #endif
 
+#ifdef MAP_GROWSDOWN
+    munmap(co->stack, COROUTINE_STACK_SIZE);
+#else
     g_free(co->stack);
+#endif
     g_free(co);
 }