linux-user: Add an argument QEMU_MMAP_BASE to set custom mmap base address in qemu user mode
diff mbox series

Message ID 20200222010925.32858-1-yuanzi@google.com
State New
Headers show
Series
  • linux-user: Add an argument QEMU_MMAP_BASE to set custom mmap base address in qemu user mode
Related show

Commit Message

Lirong Yuan Feb. 22, 2020, 1:09 a.m. UTC
This change allows us to set custom base address for guest programs. It is needed to allow qemu to work with Thread Sanitizer (TSan), which has specific boundary definitions for memory mappings on different platforms:
https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/tsan/rtl/tsan_platform.h

Signed-off-by: Lirong Yuan <yuanzi@google.com>
---
 linux-user/main.c | 12 ++++++++++++
 linux-user/mmap.c |  3 ++-
 linux-user/qemu.h |  5 +++++
 3 files changed, 19 insertions(+), 1 deletion(-)

Comments

Lirong Yuan Feb. 29, 2020, 12:43 a.m. UTC | #1
On Fri, Feb 21, 2020 at 5:09 PM Lirong Yuan <yuanzi@google.com> wrote:
>
> This change allows us to set custom base address for guest programs. It is needed to allow qemu to work with Thread Sanitizer (TSan), which has specific boundary definitions for memory mappings on different platforms:
> https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/tsan/rtl/tsan_platform.h
>
> Signed-off-by: Lirong Yuan <yuanzi@google.com>
> ---
>  linux-user/main.c | 12 ++++++++++++
>  linux-user/mmap.c |  3 ++-
>  linux-user/qemu.h |  5 +++++
>  3 files changed, 19 insertions(+), 1 deletion(-)
>
> diff --git a/linux-user/main.c b/linux-user/main.c
> index fba833aac9..c01af6bfee 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -336,6 +336,16 @@ static void handle_arg_guest_base(const char *arg)
>      have_guest_base = 1;
>  }
>
> +static void handle_arg_mmap_base(const char *arg)
> +{
> +    int err = qemu_strtoul(arg, NULL, 0, &mmap_base);
> +    if (err) {
> +        fprintf(stderr, "Invalid mmap_base: %s, err: %d\n", arg, err);
> +        exit(EXIT_FAILURE);
> +    }
> +    mmap_next_start = mmap_base;
> +}
> +
>  static void handle_arg_reserved_va(const char *arg)
>  {
>      char *p;
> @@ -440,6 +450,8 @@ static const struct qemu_argument arg_table[] = {
>       "uname",      "set qemu uname release string to 'uname'"},
>      {"B",          "QEMU_GUEST_BASE",  true,  handle_arg_guest_base,
>       "address",    "set guest_base address to 'address'"},
> +    {"mmap_base",  "QEMU_MMAP_BASE",   true,  handle_arg_mmap_base,
> +     "",           "begin allocating guest pages at this host address"},
>      {"R",          "QEMU_RESERVED_VA", true,  handle_arg_reserved_va,
>       "size",       "reserve 'size' bytes for guest virtual address space"},
>      {"d",          "QEMU_LOG",         true,  handle_arg_log,
> diff --git a/linux-user/mmap.c b/linux-user/mmap.c
> index 8685f02e7e..3f35543acf 100644
> --- a/linux-user/mmap.c
> +++ b/linux-user/mmap.c
> @@ -189,6 +189,7 @@ static int mmap_frag(abi_ulong real_start,
>  # define TASK_UNMAPPED_BASE  0x40000000
>  #endif
>  abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
> +abi_ulong mmap_base = TASK_UNMAPPED_BASE;
>
>  unsigned long last_brk;
>
> @@ -299,7 +300,7 @@ abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
>
>              if ((addr & (align - 1)) == 0) {
>                  /* Success.  */
> -                if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
> +                if (start == mmap_next_start && addr >= mmap_base) {
>                      mmap_next_start = addr + size;
>                  }
>                  return addr;
> diff --git a/linux-user/qemu.h b/linux-user/qemu.h
> index 560a68090e..83c00cfea2 100644
> --- a/linux-user/qemu.h
> +++ b/linux-user/qemu.h
> @@ -161,6 +161,11 @@ void task_settid(TaskState *);
>  void stop_all_tasks(void);
>  extern const char *qemu_uname_release;
>  extern unsigned long mmap_min_addr;
> +/*
> + * mmap_base is minimum address to use when allocating guest pages. All guest
> + * pages will be allocated at this (guest) address or higher addresses.
> + */
> +extern abi_ulong mmap_base;
>
>  /* ??? See if we can avoid exposing so much of the loader internals.  */
>
> --
> 2.25.0.265.gbab2e86ba0-goog
>

Friendly ping~

Link to the page for the patch on patchwork:
http://patchwork.ozlabs.org/patch/1242370/
Laurent Vivier March 2, 2020, 2:56 p.m. UTC | #2
Le 29/02/2020 à 01:43, Lirong Yuan a écrit :
> On Fri, Feb 21, 2020 at 5:09 PM Lirong Yuan <yuanzi@google.com> wrote:
>>
>> This change allows us to set custom base address for guest programs. It is needed to allow qemu to work with Thread Sanitizer (TSan), which has specific boundary definitions for memory mappings on different platforms:
>> https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/tsan/rtl/tsan_platform.h

Could you give more details and some examples?

Thanks,
Laurent

>> Signed-off-by: Lirong Yuan <yuanzi@google.com>
>> ---
>>  linux-user/main.c | 12 ++++++++++++
>>  linux-user/mmap.c |  3 ++-
>>  linux-user/qemu.h |  5 +++++
>>  3 files changed, 19 insertions(+), 1 deletion(-)
>>
>> diff --git a/linux-user/main.c b/linux-user/main.c
>> index fba833aac9..c01af6bfee 100644
>> --- a/linux-user/main.c
>> +++ b/linux-user/main.c
>> @@ -336,6 +336,16 @@ static void handle_arg_guest_base(const char *arg)
>>      have_guest_base = 1;
>>  }
>>
>> +static void handle_arg_mmap_base(const char *arg)
>> +{
>> +    int err = qemu_strtoul(arg, NULL, 0, &mmap_base);
>> +    if (err) {
>> +        fprintf(stderr, "Invalid mmap_base: %s, err: %d\n", arg, err);
>> +        exit(EXIT_FAILURE);
>> +    }
>> +    mmap_next_start = mmap_base;
>> +}
>> +
>>  static void handle_arg_reserved_va(const char *arg)
>>  {
>>      char *p;
>> @@ -440,6 +450,8 @@ static const struct qemu_argument arg_table[] = {
>>       "uname",      "set qemu uname release string to 'uname'"},
>>      {"B",          "QEMU_GUEST_BASE",  true,  handle_arg_guest_base,
>>       "address",    "set guest_base address to 'address'"},
>> +    {"mmap_base",  "QEMU_MMAP_BASE",   true,  handle_arg_mmap_base,
>> +     "",           "begin allocating guest pages at this host address"},
>>      {"R",          "QEMU_RESERVED_VA", true,  handle_arg_reserved_va,
>>       "size",       "reserve 'size' bytes for guest virtual address space"},
>>      {"d",          "QEMU_LOG",         true,  handle_arg_log,
>> diff --git a/linux-user/mmap.c b/linux-user/mmap.c
>> index 8685f02e7e..3f35543acf 100644
>> --- a/linux-user/mmap.c
>> +++ b/linux-user/mmap.c
>> @@ -189,6 +189,7 @@ static int mmap_frag(abi_ulong real_start,
>>  # define TASK_UNMAPPED_BASE  0x40000000
>>  #endif
>>  abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
>> +abi_ulong mmap_base = TASK_UNMAPPED_BASE;
>>
>>  unsigned long last_brk;
>>
>> @@ -299,7 +300,7 @@ abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
>>
>>              if ((addr & (align - 1)) == 0) {
>>                  /* Success.  */
>> -                if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
>> +                if (start == mmap_next_start && addr >= mmap_base) {
>>                      mmap_next_start = addr + size;
>>                  }
>>                  return addr;
>> diff --git a/linux-user/qemu.h b/linux-user/qemu.h
>> index 560a68090e..83c00cfea2 100644
>> --- a/linux-user/qemu.h
>> +++ b/linux-user/qemu.h
>> @@ -161,6 +161,11 @@ void task_settid(TaskState *);
>>  void stop_all_tasks(void);
>>  extern const char *qemu_uname_release;
>>  extern unsigned long mmap_min_addr;
>> +/*
>> + * mmap_base is minimum address to use when allocating guest pages. All guest
>> + * pages will be allocated at this (guest) address or higher addresses.
>> + */
>> +extern abi_ulong mmap_base;
>>
>>  /* ??? See if we can avoid exposing so much of the loader internals.  */
>>
>> --
>> 2.25.0.265.gbab2e86ba0-goog
>>
> 
> Friendly ping~
> 
> Link to the page for the patch on patchwork:
> http://patchwork.ozlabs.org/patch/1242370/
>
Lirong Yuan March 2, 2020, 5:53 p.m. UTC | #3
On Mon, Mar 2, 2020 at 6:56 AM Laurent Vivier <laurent@vivier.eu> wrote:
>
> Le 29/02/2020 à 01:43, Lirong Yuan a écrit :
> > On Fri, Feb 21, 2020 at 5:09 PM Lirong Yuan <yuanzi@google.com> wrote:
> >>
> >> This change allows us to set custom base address for guest programs. It is needed to allow qemu to work with Thread Sanitizer (TSan), which has specific boundary definitions for memory mappings on different platforms:
> >> https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/tsan/rtl/tsan_platform.h
>
> Could you give more details and some examples?
>
> Thanks,
> Laurent
>
> >> Signed-off-by: Lirong Yuan <yuanzi@google.com>
> >> ---
> >>  linux-user/main.c | 12 ++++++++++++
> >>  linux-user/mmap.c |  3 ++-
> >>  linux-user/qemu.h |  5 +++++
> >>  3 files changed, 19 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/linux-user/main.c b/linux-user/main.c
> >> index fba833aac9..c01af6bfee 100644
> >> --- a/linux-user/main.c
> >> +++ b/linux-user/main.c
> >> @@ -336,6 +336,16 @@ static void handle_arg_guest_base(const char *arg)
> >>      have_guest_base = 1;
> >>  }
> >>
> >> +static void handle_arg_mmap_base(const char *arg)
> >> +{
> >> +    int err = qemu_strtoul(arg, NULL, 0, &mmap_base);
> >> +    if (err) {
> >> +        fprintf(stderr, "Invalid mmap_base: %s, err: %d\n", arg, err);
> >> +        exit(EXIT_FAILURE);
> >> +    }
> >> +    mmap_next_start = mmap_base;
> >> +}
> >> +
> >>  static void handle_arg_reserved_va(const char *arg)
> >>  {
> >>      char *p;
> >> @@ -440,6 +450,8 @@ static const struct qemu_argument arg_table[] = {
> >>       "uname",      "set qemu uname release string to 'uname'"},
> >>      {"B",          "QEMU_GUEST_BASE",  true,  handle_arg_guest_base,
> >>       "address",    "set guest_base address to 'address'"},
> >> +    {"mmap_base",  "QEMU_MMAP_BASE",   true,  handle_arg_mmap_base,
> >> +     "",           "begin allocating guest pages at this host address"},
> >>      {"R",          "QEMU_RESERVED_VA", true,  handle_arg_reserved_va,
> >>       "size",       "reserve 'size' bytes for guest virtual address space"},
> >>      {"d",          "QEMU_LOG",         true,  handle_arg_log,
> >> diff --git a/linux-user/mmap.c b/linux-user/mmap.c
> >> index 8685f02e7e..3f35543acf 100644
> >> --- a/linux-user/mmap.c
> >> +++ b/linux-user/mmap.c
> >> @@ -189,6 +189,7 @@ static int mmap_frag(abi_ulong real_start,
> >>  # define TASK_UNMAPPED_BASE  0x40000000
> >>  #endif
> >>  abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
> >> +abi_ulong mmap_base = TASK_UNMAPPED_BASE;
> >>
> >>  unsigned long last_brk;
> >>
> >> @@ -299,7 +300,7 @@ abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
> >>
> >>              if ((addr & (align - 1)) == 0) {
> >>                  /* Success.  */
> >> -                if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
> >> +                if (start == mmap_next_start && addr >= mmap_base) {
> >>                      mmap_next_start = addr + size;
> >>                  }
> >>                  return addr;
> >> diff --git a/linux-user/qemu.h b/linux-user/qemu.h
> >> index 560a68090e..83c00cfea2 100644
> >> --- a/linux-user/qemu.h
> >> +++ b/linux-user/qemu.h
> >> @@ -161,6 +161,11 @@ void task_settid(TaskState *);
> >>  void stop_all_tasks(void);
> >>  extern const char *qemu_uname_release;
> >>  extern unsigned long mmap_min_addr;
> >> +/*
> >> + * mmap_base is minimum address to use when allocating guest pages. All guest
> >> + * pages will be allocated at this (guest) address or higher addresses.
> >> + */
> >> +extern abi_ulong mmap_base;
> >>
> >>  /* ??? See if we can avoid exposing so much of the loader internals.  */
> >>
> >> --
> >> 2.25.0.265.gbab2e86ba0-goog
> >>
> >
> > Friendly ping~
> >
> > Link to the page for the patch on patchwork:
> > http://patchwork.ozlabs.org/patch/1242370/
> >
>

Hi Laurent,

Sure! We tried to run a program with TSAN enabled
(https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual)
in qemu, and got this error message:
"FATAL: ThreadSanitizer: unexpected memory mapping
0x004000000000-0x004000253000"

The root cause is that the default guest base address that qemu uses
is 0x4000000000 (1ul<<38), and does not align with TSAN's expectation:
https://github.com/qemu/qemu/blob/c81acb643a61db199b9198add7972d8a8496b27c/linux-user/mmap.c#L187
https://github.com/llvm/llvm-project/blob/e7de00cf974a4e30d4900518ae8473a117efbd6c/compiler-rt/lib/tsan/rtl/tsan_platform.h#L150

By setting QEMU_GUEST_BASE, we can place the guest program at a
different base address in the host program. However, the h2g function
(in |open_self_maps| in syscall.c) translates the address back to be
based at 0x4000000000. E.g. the base address
0x4000000000+QEMU_GUEST_BASE will be converted to 0x4000000000 with
function h2g:
https://github.com/qemu/qemu/blob/c81acb643a61db199b9198add7972d8a8496b27c/linux-user/syscall.c#L7076

One solution then, is to update |open_self_maps| in syscall.c to not
use h2g. However this changes the meaning of QEMU_GUEST_BASE and could
break existing programs that set non-zero QEMU_GUEST_BASE.

So, how did qemu pick the base address 0x4000000000 then? Looking at
the blame output in github, one recent change for the base address was
committed 10 years ago:
https://github.com/qemu/qemu/c|open_self_maps| in
syscall.commit/14f24e1465edc44b9b4d89fbbea66e06088154e1

Another one was committed 12 years ago:
https://github.com/qemu/qemu/commit/a03e2d421e7f33316750d6b7396d1a7e14b18d53

The description of the first change is "place the default mapping base
for 64-bit guests (on 64-bit hosts) outside the low 4G". It would seem
that minimum requirements for the base address are:
1) addr >= 4G (for 64-bit)
2) addr < lowest address used by the host qemu program by some margin

Given that
1) only TSAN explicitly check for the validity of addresses
2) 0x4000000000 is not a valid address for programs on aarch64
(according to TSAN)
3) different architectures have different valid addresses,
it would seem that adding an argument for changing the hard-coded base
address is a viable solution.

Thanks!
Laurent Vivier March 2, 2020, 6:38 p.m. UTC | #4
Le 02/03/2020 à 18:53, Lirong Yuan a écrit :
> On Mon, Mar 2, 2020 at 6:56 AM Laurent Vivier <laurent@vivier.eu> wrote:
>>
>> Le 29/02/2020 à 01:43, Lirong Yuan a écrit :
>>> On Fri, Feb 21, 2020 at 5:09 PM Lirong Yuan <yuanzi@google.com> wrote:
>>>>
>>>> This change allows us to set custom base address for guest programs. It is needed to allow qemu to work with Thread Sanitizer (TSan), which has specific boundary definitions for memory mappings on different platforms:
>>>> https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/tsan/rtl/tsan_platform.h
>>
>> Could you give more details and some examples?
>>
>> Thanks,
>> Laurent
>>
>>>> Signed-off-by: Lirong Yuan <yuanzi@google.com>
>>>> ---
>>>>  linux-user/main.c | 12 ++++++++++++
>>>>  linux-user/mmap.c |  3 ++-
>>>>  linux-user/qemu.h |  5 +++++
>>>>  3 files changed, 19 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/linux-user/main.c b/linux-user/main.c
>>>> index fba833aac9..c01af6bfee 100644
>>>> --- a/linux-user/main.c
>>>> +++ b/linux-user/main.c
>>>> @@ -336,6 +336,16 @@ static void handle_arg_guest_base(const char *arg)
>>>>      have_guest_base = 1;
>>>>  }
>>>>
>>>> +static void handle_arg_mmap_base(const char *arg)
>>>> +{
>>>> +    int err = qemu_strtoul(arg, NULL, 0, &mmap_base);
>>>> +    if (err) {
>>>> +        fprintf(stderr, "Invalid mmap_base: %s, err: %d\n", arg, err);
>>>> +        exit(EXIT_FAILURE);
>>>> +    }
>>>> +    mmap_next_start = mmap_base;
>>>> +}
>>>> +
>>>>  static void handle_arg_reserved_va(const char *arg)
>>>>  {
>>>>      char *p;
>>>> @@ -440,6 +450,8 @@ static const struct qemu_argument arg_table[] = {
>>>>       "uname",      "set qemu uname release string to 'uname'"},
>>>>      {"B",          "QEMU_GUEST_BASE",  true,  handle_arg_guest_base,
>>>>       "address",    "set guest_base address to 'address'"},
>>>> +    {"mmap_base",  "QEMU_MMAP_BASE",   true,  handle_arg_mmap_base,
>>>> +     "",           "begin allocating guest pages at this host address"},
>>>>      {"R",          "QEMU_RESERVED_VA", true,  handle_arg_reserved_va,
>>>>       "size",       "reserve 'size' bytes for guest virtual address space"},
>>>>      {"d",          "QEMU_LOG",         true,  handle_arg_log,
>>>> diff --git a/linux-user/mmap.c b/linux-user/mmap.c
>>>> index 8685f02e7e..3f35543acf 100644
>>>> --- a/linux-user/mmap.c
>>>> +++ b/linux-user/mmap.c
>>>> @@ -189,6 +189,7 @@ static int mmap_frag(abi_ulong real_start,
>>>>  # define TASK_UNMAPPED_BASE  0x40000000
>>>>  #endif
>>>>  abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
>>>> +abi_ulong mmap_base = TASK_UNMAPPED_BASE;
>>>>
>>>>  unsigned long last_brk;
>>>>
>>>> @@ -299,7 +300,7 @@ abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
>>>>
>>>>              if ((addr & (align - 1)) == 0) {
>>>>                  /* Success.  */
>>>> -                if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
>>>> +                if (start == mmap_next_start && addr >= mmap_base) {
>>>>                      mmap_next_start = addr + size;
>>>>                  }
>>>>                  return addr;
>>>> diff --git a/linux-user/qemu.h b/linux-user/qemu.h
>>>> index 560a68090e..83c00cfea2 100644
>>>> --- a/linux-user/qemu.h
>>>> +++ b/linux-user/qemu.h
>>>> @@ -161,6 +161,11 @@ void task_settid(TaskState *);
>>>>  void stop_all_tasks(void);
>>>>  extern const char *qemu_uname_release;
>>>>  extern unsigned long mmap_min_addr;
>>>> +/*
>>>> + * mmap_base is minimum address to use when allocating guest pages. All guest
>>>> + * pages will be allocated at this (guest) address or higher addresses.
>>>> + */
>>>> +extern abi_ulong mmap_base;
>>>>
>>>>  /* ??? See if we can avoid exposing so much of the loader internals.  */
>>>>
>>>> --
>>>> 2.25.0.265.gbab2e86ba0-goog
>>>>
>>>
>>> Friendly ping~
>>>
>>> Link to the page for the patch on patchwork:
>>> http://patchwork.ozlabs.org/patch/1242370/
>>>
>>
> 
> Hi Laurent,
> 
> Sure! We tried to run a program with TSAN enabled
> (https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual)
> in qemu, and got this error message:
> "FATAL: ThreadSanitizer: unexpected memory mapping
> 0x004000000000-0x004000253000"
> 
> The root cause is that the default guest base address that qemu uses
> is 0x4000000000 (1ul<<38), and does not align with TSAN's expectation:
> https://github.com/qemu/qemu/blob/c81acb643a61db199b9198add7972d8a8496b27c/linux-user/mmap.c#L187
> https://github.com/llvm/llvm-project/blob/e7de00cf974a4e30d4900518ae8473a117efbd6c/compiler-rt/lib/tsan/rtl/tsan_platform.h#L150
> 
> By setting QEMU_GUEST_BASE, we can place the guest program at a
> different base address in the host program. However, the h2g function
> (in |open_self_maps| in syscall.c) translates the address back to be
> based at 0x4000000000. E.g. the base address
> 0x4000000000+QEMU_GUEST_BASE will be converted to 0x4000000000 with
> function h2g:
> https://github.com/qemu/qemu/blob/c81acb643a61db199b9198add7972d8a8496b27c/linux-user/syscall.c#L7076
> 
> One solution then, is to update |open_self_maps| in syscall.c to not
> use h2g. However this changes the meaning of QEMU_GUEST_BASE and could
> break existing programs that set non-zero QEMU_GUEST_BASE.
> 
> So, how did qemu pick the base address 0x4000000000 then? Looking at
> the blame output in github, one recent change for the base address was
> committed 10 years ago:
> https://github.com/qemu/qemu/c|open_self_maps| in
> syscall.commit/14f24e1465edc44b9b4d89fbbea66e06088154e1
> 
> Another one was committed 12 years ago:
> https://github.com/qemu/qemu/commit/a03e2d421e7f33316750d6b7396d1a7e14b18d53
> 
> The description of the first change is "place the default mapping base
> for 64-bit guests (on 64-bit hosts) outside the low 4G". It would seem
> that minimum requirements for the base address are:
> 1) addr >= 4G (for 64-bit)
> 2) addr < lowest address used by the host qemu program by some margin
> 
> Given that
> 1) only TSAN explicitly check for the validity of addresses
> 2) 0x4000000000 is not a valid address for programs on aarch64
> (according to TSAN)
> 3) different architectures have different valid addresses,
> it would seem that adding an argument for changing the hard-coded base
> address is a viable solution.

Thank you for the detailed explanation.

Could you show me an example of the QEMU command line you use?

I'm wondering if hardcoding directly the good value would be a better
solution?

Richard, do you have some thoughts on this?

Thanks,
Laurent
Lirong Yuan March 2, 2020, 7:51 p.m. UTC | #5
On Mon, Mar 2, 2020 at 10:39 AM Laurent Vivier <laurent@vivier.eu> wrote:
>
> Le 02/03/2020 à 18:53, Lirong Yuan a écrit :
> > On Mon, Mar 2, 2020 at 6:56 AM Laurent Vivier <laurent@vivier.eu> wrote:
> >>
> >> Le 29/02/2020 à 01:43, Lirong Yuan a écrit :
> >>> On Fri, Feb 21, 2020 at 5:09 PM Lirong Yuan <yuanzi@google.com> wrote:
> >>>>
> >>>> This change allows us to set custom base address for guest programs. It is needed to allow qemu to work with Thread Sanitizer (TSan), which has specific boundary definitions for memory mappings on different platforms:
> >>>> https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/tsan/rtl/tsan_platform.h
> >>
> >> Could you give more details and some examples?
> >>
> >> Thanks,
> >> Laurent
> >>
> >>>> Signed-off-by: Lirong Yuan <yuanzi@google.com>
> >>>> ---
> >>>>  linux-user/main.c | 12 ++++++++++++
> >>>>  linux-user/mmap.c |  3 ++-
> >>>>  linux-user/qemu.h |  5 +++++
> >>>>  3 files changed, 19 insertions(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/linux-user/main.c b/linux-user/main.c
> >>>> index fba833aac9..c01af6bfee 100644
> >>>> --- a/linux-user/main.c
> >>>> +++ b/linux-user/main.c
> >>>> @@ -336,6 +336,16 @@ static void handle_arg_guest_base(const char *arg)
> >>>>      have_guest_base = 1;
> >>>>  }
> >>>>
> >>>> +static void handle_arg_mmap_base(const char *arg)
> >>>> +{
> >>>> +    int err = qemu_strtoul(arg, NULL, 0, &mmap_base);
> >>>> +    if (err) {
> >>>> +        fprintf(stderr, "Invalid mmap_base: %s, err: %d\n", arg, err);
> >>>> +        exit(EXIT_FAILURE);
> >>>> +    }
> >>>> +    mmap_next_start = mmap_base;
> >>>> +}
> >>>> +
> >>>>  static void handle_arg_reserved_va(const char *arg)
> >>>>  {
> >>>>      char *p;
> >>>> @@ -440,6 +450,8 @@ static const struct qemu_argument arg_table[] = {
> >>>>       "uname",      "set qemu uname release string to 'uname'"},
> >>>>      {"B",          "QEMU_GUEST_BASE",  true,  handle_arg_guest_base,
> >>>>       "address",    "set guest_base address to 'address'"},
> >>>> +    {"mmap_base",  "QEMU_MMAP_BASE",   true,  handle_arg_mmap_base,
> >>>> +     "",           "begin allocating guest pages at this host address"},
> >>>>      {"R",          "QEMU_RESERVED_VA", true,  handle_arg_reserved_va,
> >>>>       "size",       "reserve 'size' bytes for guest virtual address space"},
> >>>>      {"d",          "QEMU_LOG",         true,  handle_arg_log,
> >>>> diff --git a/linux-user/mmap.c b/linux-user/mmap.c
> >>>> index 8685f02e7e..3f35543acf 100644
> >>>> --- a/linux-user/mmap.c
> >>>> +++ b/linux-user/mmap.c
> >>>> @@ -189,6 +189,7 @@ static int mmap_frag(abi_ulong real_start,
> >>>>  # define TASK_UNMAPPED_BASE  0x40000000
> >>>>  #endif
> >>>>  abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
> >>>> +abi_ulong mmap_base = TASK_UNMAPPED_BASE;
> >>>>
> >>>>  unsigned long last_brk;
> >>>>
> >>>> @@ -299,7 +300,7 @@ abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
> >>>>
> >>>>              if ((addr & (align - 1)) == 0) {
> >>>>                  /* Success.  */
> >>>> -                if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
> >>>> +                if (start == mmap_next_start && addr >= mmap_base) {
> >>>>                      mmap_next_start = addr + size;
> >>>>                  }
> >>>>                  return addr;
> >>>> diff --git a/linux-user/qemu.h b/linux-user/qemu.h
> >>>> index 560a68090e..83c00cfea2 100644
> >>>> --- a/linux-user/qemu.h
> >>>> +++ b/linux-user/qemu.h
> >>>> @@ -161,6 +161,11 @@ void task_settid(TaskState *);
> >>>>  void stop_all_tasks(void);
> >>>>  extern const char *qemu_uname_release;
> >>>>  extern unsigned long mmap_min_addr;
> >>>> +/*
> >>>> + * mmap_base is minimum address to use when allocating guest pages. All guest
> >>>> + * pages will be allocated at this (guest) address or higher addresses.
> >>>> + */
> >>>> +extern abi_ulong mmap_base;
> >>>>
> >>>>  /* ??? See if we can avoid exposing so much of the loader internals.  */
> >>>>
> >>>> --
> >>>> 2.25.0.265.gbab2e86ba0-goog
> >>>>
> >>>
> >>> Friendly ping~
> >>>
> >>> Link to the page for the patch on patchwork:
> >>> http://patchwork.ozlabs.org/patch/1242370/
> >>>
> >>
> >
> > Hi Laurent,
> >
> > Sure! We tried to run a program with TSAN enabled
> > (https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual)
> > in qemu, and got this error message:
> > "FATAL: ThreadSanitizer: unexpected memory mapping
> > 0x004000000000-0x004000253000"
> >
> > The root cause is that the default guest base address that qemu uses
> > is 0x4000000000 (1ul<<38), and does not align with TSAN's expectation:
> > https://github.com/qemu/qemu/blob/c81acb643a61db199b9198add7972d8a8496b27c/linux-user/mmap.c#L187
> > https://github.com/llvm/llvm-project/blob/e7de00cf974a4e30d4900518ae8473a117efbd6c/compiler-rt/lib/tsan/rtl/tsan_platform.h#L150
> >
> > By setting QEMU_GUEST_BASE, we can place the guest program at a
> > different base address in the host program. However, the h2g function
> > (in |open_self_maps| in syscall.c) translates the address back to be
> > based at 0x4000000000. E.g. the base address
> > 0x4000000000+QEMU_GUEST_BASE will be converted to 0x4000000000 with
> > function h2g:
> > https://github.com/qemu/qemu/blob/c81acb643a61db199b9198add7972d8a8496b27c/linux-user/syscall.c#L7076
> >
> > One solution then, is to update |open_self_maps| in syscall.c to not
> > use h2g. However this changes the meaning of QEMU_GUEST_BASE and could
> > break existing programs that set non-zero QEMU_GUEST_BASE.
> >
> > So, how did qemu pick the base address 0x4000000000 then? Looking at
> > the blame output in github, one recent change for the base address was
> > committed 10 years ago:
> > https://github.com/qemu/qemu/c|open_self_maps| in
> > syscall.commit/14f24e1465edc44b9b4d89fbbea66e06088154e1
> >
> > Another one was committed 12 years ago:
> > https://github.com/qemu/qemu/commit/a03e2d421e7f33316750d6b7396d1a7e14b18d53
> >
> > The description of the first change is "place the default mapping base
> > for 64-bit guests (on 64-bit hosts) outside the low 4G". It would seem
> > that minimum requirements for the base address are:
> > 1) addr >= 4G (for 64-bit)
> > 2) addr < lowest address used by the host qemu program by some margin
> >
> > Given that
> > 1) only TSAN explicitly check for the validity of addresses
> > 2) 0x4000000000 is not a valid address for programs on aarch64
> > (according to TSAN)
> > 3) different architectures have different valid addresses,
> > it would seem that adding an argument for changing the hard-coded base
> > address is a viable solution.
>
> Thank you for the detailed explanation.
>
> Could you show me an example of the QEMU command line you use?
>
> I'm wondering if hardcoding directly the good value would be a better
> solution?
>
> Richard, do you have some thoughts on this?
>
> Thanks,
> Laurent

Sure! First we compile a simple race program with TSAN enabled:
( Simple race program is here:
https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual#usage
)
$ clang++ simple_race.cc -fsanitize=thread -fPIE -pie -g -o simple_race

Next we run a script for executing the program, and it exports
environment variables:
QEMU_CPU=max
QEMU_MMAP_BASE=0x0000005500000000

And runs the QEMU program:
$ qemu-aarch64 simple_race

I changed the default value for all other programs that I am working
with, and so far we haven't seen any problems.
For the patch, it might be better to err on the safe side and not
change the hard-coded value, as it might cause potential breakages for
other users.
Though I don't know much about how the default value might be used or
depended on by other programs, so if you see no concerns for updating
the value, I'd be happy to change it too.
Lirong Yuan March 9, 2020, 6:07 p.m. UTC | #6
On Mon, Mar 2, 2020 at 11:51 AM Lirong Yuan <yuanzi@google.com> wrote:

> On Mon, Mar 2, 2020 at 10:39 AM Laurent Vivier <laurent@vivier.eu> wrote:
> >
> > Le 02/03/2020 à 18:53, Lirong Yuan a écrit :
> > > On Mon, Mar 2, 2020 at 6:56 AM Laurent Vivier <laurent@vivier.eu>
> wrote:
> > >>
> > >> Le 29/02/2020 à 01:43, Lirong Yuan a écrit :
> > >>> On Fri, Feb 21, 2020 at 5:09 PM Lirong Yuan <yuanzi@google.com>
> wrote:
> > >>>>
> > >>>> This change allows us to set custom base address for guest
> programs. It is needed to allow qemu to work with Thread Sanitizer (TSan),
> which has specific boundary definitions for memory mappings on different
> platforms:
> > >>>>
> https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/tsan/rtl/tsan_platform.h
> > >>
> > >> Could you give more details and some examples?
> > >>
> > >> Thanks,
> > >> Laurent
> > >>
> > >>>> Signed-off-by: Lirong Yuan <yuanzi@google.com>
> > >>>> ---
> > >>>>  linux-user/main.c | 12 ++++++++++++
> > >>>>  linux-user/mmap.c |  3 ++-
> > >>>>  linux-user/qemu.h |  5 +++++
> > >>>>  3 files changed, 19 insertions(+), 1 deletion(-)
> > >>>>
> > >>>> diff --git a/linux-user/main.c b/linux-user/main.c
> > >>>> index fba833aac9..c01af6bfee 100644
> > >>>> --- a/linux-user/main.c
> > >>>> +++ b/linux-user/main.c
> > >>>> @@ -336,6 +336,16 @@ static void handle_arg_guest_base(const char
> *arg)
> > >>>>      have_guest_base = 1;
> > >>>>  }
> > >>>>
> > >>>> +static void handle_arg_mmap_base(const char *arg)
> > >>>> +{
> > >>>> +    int err = qemu_strtoul(arg, NULL, 0, &mmap_base);
> > >>>> +    if (err) {
> > >>>> +        fprintf(stderr, "Invalid mmap_base: %s, err: %d\n", arg,
> err);
> > >>>> +        exit(EXIT_FAILURE);
> > >>>> +    }
> > >>>> +    mmap_next_start = mmap_base;
> > >>>> +}
> > >>>> +
> > >>>>  static void handle_arg_reserved_va(const char *arg)
> > >>>>  {
> > >>>>      char *p;
> > >>>> @@ -440,6 +450,8 @@ static const struct qemu_argument arg_table[] =
> {
> > >>>>       "uname",      "set qemu uname release string to 'uname'"},
> > >>>>      {"B",          "QEMU_GUEST_BASE",  true,
> handle_arg_guest_base,
> > >>>>       "address",    "set guest_base address to 'address'"},
> > >>>> +    {"mmap_base",  "QEMU_MMAP_BASE",   true,  handle_arg_mmap_base,
> > >>>> +     "",           "begin allocating guest pages at this host
> address"},
> > >>>>      {"R",          "QEMU_RESERVED_VA", true,
> handle_arg_reserved_va,
> > >>>>       "size",       "reserve 'size' bytes for guest virtual address
> space"},
> > >>>>      {"d",          "QEMU_LOG",         true,  handle_arg_log,
> > >>>> diff --git a/linux-user/mmap.c b/linux-user/mmap.c
> > >>>> index 8685f02e7e..3f35543acf 100644
> > >>>> --- a/linux-user/mmap.c
> > >>>> +++ b/linux-user/mmap.c
> > >>>> @@ -189,6 +189,7 @@ static int mmap_frag(abi_ulong real_start,
> > >>>>  # define TASK_UNMAPPED_BASE  0x40000000
> > >>>>  #endif
> > >>>>  abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
> > >>>> +abi_ulong mmap_base = TASK_UNMAPPED_BASE;
> > >>>>
> > >>>>  unsigned long last_brk;
> > >>>>
> > >>>> @@ -299,7 +300,7 @@ abi_ulong mmap_find_vma(abi_ulong start,
> abi_ulong size, abi_ulong align)
> > >>>>
> > >>>>              if ((addr & (align - 1)) == 0) {
> > >>>>                  /* Success.  */
> > >>>> -                if (start == mmap_next_start && addr >=
> TASK_UNMAPPED_BASE) {
> > >>>> +                if (start == mmap_next_start && addr >= mmap_base)
> {
> > >>>>                      mmap_next_start = addr + size;
> > >>>>                  }
> > >>>>                  return addr;
> > >>>> diff --git a/linux-user/qemu.h b/linux-user/qemu.h
> > >>>> index 560a68090e..83c00cfea2 100644
> > >>>> --- a/linux-user/qemu.h
> > >>>> +++ b/linux-user/qemu.h
> > >>>> @@ -161,6 +161,11 @@ void task_settid(TaskState *);
> > >>>>  void stop_all_tasks(void);
> > >>>>  extern const char *qemu_uname_release;
> > >>>>  extern unsigned long mmap_min_addr;
> > >>>> +/*
> > >>>> + * mmap_base is minimum address to use when allocating guest
> pages. All guest
> > >>>> + * pages will be allocated at this (guest) address or higher
> addresses.
> > >>>> + */
> > >>>> +extern abi_ulong mmap_base;
> > >>>>
> > >>>>  /* ??? See if we can avoid exposing so much of the loader
> internals.  */
> > >>>>
> > >>>> --
> > >>>> 2.25.0.265.gbab2e86ba0-goog
> > >>>>
> > >>>
> > >>> Friendly ping~
> > >>>
> > >>> Link to the page for the patch on patchwork:
> > >>> http://patchwork.ozlabs.org/patch/1242370/
> > >>>
> > >>
> > >
> > > Hi Laurent,
> > >
> > > Sure! We tried to run a program with TSAN enabled
> > > (https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual)
> > > in qemu, and got this error message:
> > > "FATAL: ThreadSanitizer: unexpected memory mapping
> > > 0x004000000000-0x004000253000"
> > >
> > > The root cause is that the default guest base address that qemu uses
> > > is 0x4000000000 (1ul<<38), and does not align with TSAN's expectation:
> > >
> https://github.com/qemu/qemu/blob/c81acb643a61db199b9198add7972d8a8496b27c/linux-user/mmap.c#L187
> > >
> https://github.com/llvm/llvm-project/blob/e7de00cf974a4e30d4900518ae8473a117efbd6c/compiler-rt/lib/tsan/rtl/tsan_platform.h#L150
> > >
> > > By setting QEMU_GUEST_BASE, we can place the guest program at a
> > > different base address in the host program. However, the h2g function
> > > (in |open_self_maps| in syscall.c) translates the address back to be
> > > based at 0x4000000000. E.g. the base address
> > > 0x4000000000+QEMU_GUEST_BASE will be converted to 0x4000000000 with
> > > function h2g:
> > >
> https://github.com/qemu/qemu/blob/c81acb643a61db199b9198add7972d8a8496b27c/linux-user/syscall.c#L7076
> > >
> > > One solution then, is to update |open_self_maps| in syscall.c to not
> > > use h2g. However this changes the meaning of QEMU_GUEST_BASE and could
> > > break existing programs that set non-zero QEMU_GUEST_BASE.
> > >
> > > So, how did qemu pick the base address 0x4000000000 then? Looking at
> > > the blame output in github, one recent change for the base address was
> > > committed 10 years ago:
> > > https://github.com/qemu/qemu/c|open_self_maps| in
> > > syscall.commit/14f24e1465edc44b9b4d89fbbea66e06088154e1
> > >
> > > Another one was committed 12 years ago:
> > >
> https://github.com/qemu/qemu/commit/a03e2d421e7f33316750d6b7396d1a7e14b18d53
> > >
> > > The description of the first change is "place the default mapping base
> > > for 64-bit guests (on 64-bit hosts) outside the low 4G". It would seem
> > > that minimum requirements for the base address are:
> > > 1) addr >= 4G (for 64-bit)
> > > 2) addr < lowest address used by the host qemu program by some margin
> > >
> > > Given that
> > > 1) only TSAN explicitly check for the validity of addresses
> > > 2) 0x4000000000 is not a valid address for programs on aarch64
> > > (according to TSAN)
> > > 3) different architectures have different valid addresses,
> > > it would seem that adding an argument for changing the hard-coded base
> > > address is a viable solution.
> >
> > Thank you for the detailed explanation.
> >
> > Could you show me an example of the QEMU command line you use?
> >
> > I'm wondering if hardcoding directly the good value would be a better
> > solution?
> >
> > Richard, do you have some thoughts on this?
> >
> > Thanks,
> > Laurent
>
> Sure! First we compile a simple race program with TSAN enabled:
> ( Simple race program is here:
> https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual#usage
> )
> $ clang++ simple_race.cc -fsanitize=thread -fPIE -pie -g -o simple_race
>
> Next we run a script for executing the program, and it exports
> environment variables:
> QEMU_CPU=max
> QEMU_MMAP_BASE=0x0000005500000000
>
> And runs the QEMU program:
> $ qemu-aarch64 simple_race
>
> I changed the default value for all other programs that I am working
> with, and so far we haven't seen any problems.
> For the patch, it might be better to err on the safe side and not
> change the hard-coded value, as it might cause potential breakages for
> other users.
> Though I don't know much about how the default value might be used or
> depended on by other programs, so if you see no concerns for updating
> the value, I'd be happy to change it too.
>

Friendly ping~

Link to the page for the patch on patchwork:
http://patchwork.ozlabs.org/patch/1242370/
Laurent Vivier March 12, 2020, 8:42 a.m. UTC | #7
Le 09/03/2020 à 19:07, Lirong Yuan a écrit :
> 
> On Mon, Mar 2, 2020 at 11:51 AM Lirong Yuan <yuanzi@google.com
> <mailto:yuanzi@google.com>> wrote:
> 
>     On Mon, Mar 2, 2020 at 10:39 AM Laurent Vivier <laurent@vivier.eu
>     <mailto:laurent@vivier.eu>> wrote:
>     >
>     > Le 02/03/2020 à 18:53, Lirong Yuan a écrit :
>     > > On Mon, Mar 2, 2020 at 6:56 AM Laurent Vivier <laurent@vivier.eu
>     <mailto:laurent@vivier.eu>> wrote:
>     > >>
>     > >> Le 29/02/2020 à 01:43, Lirong Yuan a écrit :
>     > >>> On Fri, Feb 21, 2020 at 5:09 PM Lirong Yuan <yuanzi@google.com
>     <mailto:yuanzi@google.com>> wrote:
>     > >>>>
>     > >>>> This change allows us to set custom base address for guest
>     programs. It is needed to allow qemu to work with Thread Sanitizer
>     (TSan), which has specific boundary definitions for memory mappings
>     on different platforms:
>     > >>>>
>     https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/tsan/rtl/tsan_platform.h
>     > >>
>     > >> Could you give more details and some examples?
>     > >>
>     > >> Thanks,
>     > >> Laurent
>     > >>
>     > >>>> Signed-off-by: Lirong Yuan <yuanzi@google.com
>     <mailto:yuanzi@google.com>>
>     > >>>> ---
>     > >>>>  linux-user/main.c | 12 ++++++++++++
>     > >>>>  linux-user/mmap.c |  3 ++-
>     > >>>>  linux-user/qemu.h |  5 +++++
>     > >>>>  3 files changed, 19 insertions(+), 1 deletion(-)
>     > >>>>
>     > >>>> diff --git a/linux-user/main.c b/linux-user/main.c
>     > >>>> index fba833aac9..c01af6bfee 100644
>     > >>>> --- a/linux-user/main.c
>     > >>>> +++ b/linux-user/main.c
>     > >>>> @@ -336,6 +336,16 @@ static void handle_arg_guest_base(const
>     char *arg)
>     > >>>>      have_guest_base = 1;
>     > >>>>  }
>     > >>>>
>     > >>>> +static void handle_arg_mmap_base(const char *arg)
>     > >>>> +{
>     > >>>> +    int err = qemu_strtoul(arg, NULL, 0, &mmap_base);
>     > >>>> +    if (err) {
>     > >>>> +        fprintf(stderr, "Invalid mmap_base: %s, err: %d\n",
>     arg, err);
>     > >>>> +        exit(EXIT_FAILURE);
>     > >>>> +    }
>     > >>>> +    mmap_next_start = mmap_base;
>     > >>>> +}
>     > >>>> +
>     > >>>>  static void handle_arg_reserved_va(const char *arg)
>     > >>>>  {
>     > >>>>      char *p;
>     > >>>> @@ -440,6 +450,8 @@ static const struct qemu_argument
>     arg_table[] = {
>     > >>>>       "uname",      "set qemu uname release string to 'uname'"},
>     > >>>>      {"B",          "QEMU_GUEST_BASE",  true, 
>     handle_arg_guest_base,
>     > >>>>       "address",    "set guest_base address to 'address'"},
>     > >>>> +    {"mmap_base",  "QEMU_MMAP_BASE",   true, 
>     handle_arg_mmap_base,
>     > >>>> +     "",           "begin allocating guest pages at this
>     host address"},
>     > >>>>      {"R",          "QEMU_RESERVED_VA", true, 
>     handle_arg_reserved_va,
>     > >>>>       "size",       "reserve 'size' bytes for guest virtual
>     address space"},
>     > >>>>      {"d",          "QEMU_LOG",         true,  handle_arg_log,
>     > >>>> diff --git a/linux-user/mmap.c b/linux-user/mmap.c
>     > >>>> index 8685f02e7e..3f35543acf 100644
>     > >>>> --- a/linux-user/mmap.c
>     > >>>> +++ b/linux-user/mmap.c
>     > >>>> @@ -189,6 +189,7 @@ static int mmap_frag(abi_ulong real_start,
>     > >>>>  # define TASK_UNMAPPED_BASE  0x40000000
>     > >>>>  #endif
>     > >>>>  abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
>     > >>>> +abi_ulong mmap_base = TASK_UNMAPPED_BASE;
>     > >>>>
>     > >>>>  unsigned long last_brk;
>     > >>>>
>     > >>>> @@ -299,7 +300,7 @@ abi_ulong mmap_find_vma(abi_ulong start,
>     abi_ulong size, abi_ulong align)
>     > >>>>
>     > >>>>              if ((addr & (align - 1)) == 0) {
>     > >>>>                  /* Success.  */
>     > >>>> -                if (start == mmap_next_start && addr >=
>     TASK_UNMAPPED_BASE) {
>     > >>>> +                if (start == mmap_next_start && addr >=
>     mmap_base) {
>     > >>>>                      mmap_next_start = addr + size;
>     > >>>>                  }
>     > >>>>                  return addr;
>     > >>>> diff --git a/linux-user/qemu.h b/linux-user/qemu.h
>     > >>>> index 560a68090e..83c00cfea2 100644
>     > >>>> --- a/linux-user/qemu.h
>     > >>>> +++ b/linux-user/qemu.h
>     > >>>> @@ -161,6 +161,11 @@ void task_settid(TaskState *);
>     > >>>>  void stop_all_tasks(void);
>     > >>>>  extern const char *qemu_uname_release;
>     > >>>>  extern unsigned long mmap_min_addr;
>     > >>>> +/*
>     > >>>> + * mmap_base is minimum address to use when allocating guest
>     pages. All guest
>     > >>>> + * pages will be allocated at this (guest) address or higher
>     addresses.
>     > >>>> + */
>     > >>>> +extern abi_ulong mmap_base;
>     > >>>>
>     > >>>>  /* ??? See if we can avoid exposing so much of the loader
>     internals.  */
>     > >>>>
>     > >>>> --
>     > >>>> 2.25.0.265.gbab2e86ba0-goog
>     > >>>>
>     > >>>
>     > >>> Friendly ping~
>     > >>>
>     > >>> Link to the page for the patch on patchwork:
>     > >>> http://patchwork.ozlabs.org/patch/1242370/
>     > >>>
>     > >>
>     > >
>     > > Hi Laurent,
>     > >
>     > > Sure! We tried to run a program with TSAN enabled
>     > > (https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual)
>     > > in qemu, and got this error message:
>     > > "FATAL: ThreadSanitizer: unexpected memory mapping
>     > > 0x004000000000-0x004000253000"
>     > >
>     > > The root cause is that the default guest base address that qemu uses
>     > > is 0x4000000000 (1ul<<38), and does not align with TSAN's
>     expectation:
>     > >
>     https://github.com/qemu/qemu/blob/c81acb643a61db199b9198add7972d8a8496b27c/linux-user/mmap.c#L187
>     > >
>     https://github.com/llvm/llvm-project/blob/e7de00cf974a4e30d4900518ae8473a117efbd6c/compiler-rt/lib/tsan/rtl/tsan_platform.h#L150
>     > >
>     > > By setting QEMU_GUEST_BASE, we can place the guest program at a
>     > > different base address in the host program. However, the h2g
>     function
>     > > (in |open_self_maps| in syscall.c) translates the address back to be
>     > > based at 0x4000000000. E.g. the base address
>     > > 0x4000000000+QEMU_GUEST_BASE will be converted to 0x4000000000 with
>     > > function h2g:
>     > >
>     https://github.com/qemu/qemu/blob/c81acb643a61db199b9198add7972d8a8496b27c/linux-user/syscall.c#L7076
>     > >
>     > > One solution then, is to update |open_self_maps| in syscall.c to not
>     > > use h2g. However this changes the meaning of QEMU_GUEST_BASE and
>     could
>     > > break existing programs that set non-zero QEMU_GUEST_BASE.
>     > >
>     > > So, how did qemu pick the base address 0x4000000000 then? Looking at
>     > > the blame output in github, one recent change for the base
>     address was
>     > > committed 10 years ago:
>     > > https://github.com/qemu/qemu/c|open_self_maps|
>     <https://github.com/qemu/qemu/c%7Copen_self_maps%7C> in
>     > > syscall.commit/14f24e1465edc44b9b4d89fbbea66e06088154e1
>     > >
>     > > Another one was committed 12 years ago:
>     > >
>     https://github.com/qemu/qemu/commit/a03e2d421e7f33316750d6b7396d1a7e14b18d53
>     > >
>     > > The description of the first change is "place the default
>     mapping base
>     > > for 64-bit guests (on 64-bit hosts) outside the low 4G". It
>     would seem
>     > > that minimum requirements for the base address are:
>     > > 1) addr >= 4G (for 64-bit)
>     > > 2) addr < lowest address used by the host qemu program by some
>     margin
>     > >
>     > > Given that
>     > > 1) only TSAN explicitly check for the validity of addresses
>     > > 2) 0x4000000000 is not a valid address for programs on aarch64
>     > > (according to TSAN)
>     > > 3) different architectures have different valid addresses,
>     > > it would seem that adding an argument for changing the
>     hard-coded base
>     > > address is a viable solution.
>     >
>     > Thank you for the detailed explanation.
>     >
>     > Could you show me an example of the QEMU command line you use?
>     >
>     > I'm wondering if hardcoding directly the good value would be a better
>     > solution?
>     >
>     > Richard, do you have some thoughts on this?
>     >
>     > Thanks,
>     > Laurent
> 
>     Sure! First we compile a simple race program with TSAN enabled:
>     ( Simple race program is here:
>     https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual#usage
>     )
>     $ clang++ simple_race.cc -fsanitize=thread -fPIE -pie -g -o simple_race
> 
>     Next we run a script for executing the program, and it exports
>     environment variables:
>     QEMU_CPU=max
>     QEMU_MMAP_BASE=0x0000005500000000
> 
>     And runs the QEMU program:
>     $ qemu-aarch64 simple_race
> 
>     I changed the default value for all other programs that I am working
>     with, and so far we haven't seen any problems.
>     For the patch, it might be better to err on the safe side and not
>     change the hard-coded value, as it might cause potential breakages for
>     other users.
>     Though I don't know much about how the default value might be used or
>     depended on by other programs, so if you see no concerns for updating
>     the value, I'd be happy to change it too.
> 
> 
> Friendly ping~
> 
> Link to the page for the patch on patchwork:
> http://patchwork.ozlabs.org/patch/1242370/ 

I would prefer if you hardcode the value for aarch64 rather than adding
a new parameter.

Thanks,
Laurent
Lirong Yuan March 12, 2020, 10:35 p.m. UTC | #8
On Thu, Mar 12, 2020 at 1:42 AM Laurent Vivier <laurent@vivier.eu> wrote:

> Le 09/03/2020 à 19:07, Lirong Yuan a écrit :
> >
> > On Mon, Mar 2, 2020 at 11:51 AM Lirong Yuan <yuanzi@google.com
> > <mailto:yuanzi@google.com>> wrote:
> >
> >     On Mon, Mar 2, 2020 at 10:39 AM Laurent Vivier <laurent@vivier.eu
> >     <mailto:laurent@vivier.eu>> wrote:
> >     >
> >     > Le 02/03/2020 à 18:53, Lirong Yuan a écrit :
> >     > > On Mon, Mar 2, 2020 at 6:56 AM Laurent Vivier <laurent@vivier.eu
> >     <mailto:laurent@vivier.eu>> wrote:
> >     > >>
> >     > >> Le 29/02/2020 à 01:43, Lirong Yuan a écrit :
> >     > >>> On Fri, Feb 21, 2020 at 5:09 PM Lirong Yuan <yuanzi@google.com
> >     <mailto:yuanzi@google.com>> wrote:
> >     > >>>>
> >     > >>>> This change allows us to set custom base address for guest
> >     programs. It is needed to allow qemu to work with Thread Sanitizer
> >     (TSan), which has specific boundary definitions for memory mappings
> >     on different platforms:
> >     > >>>>
> >
> https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/tsan/rtl/tsan_platform.h
> >     > >>
> >     > >> Could you give more details and some examples?
> >     > >>
> >     > >> Thanks,
> >     > >> Laurent
> >     > >>
> >     > >>>> Signed-off-by: Lirong Yuan <yuanzi@google.com
> >     <mailto:yuanzi@google.com>>
> >     > >>>> ---
> >     > >>>>  linux-user/main.c | 12 ++++++++++++
> >     > >>>>  linux-user/mmap.c |  3 ++-
> >     > >>>>  linux-user/qemu.h |  5 +++++
> >     > >>>>  3 files changed, 19 insertions(+), 1 deletion(-)
> >     > >>>>
> >     > >>>> diff --git a/linux-user/main.c b/linux-user/main.c
> >     > >>>> index fba833aac9..c01af6bfee 100644
> >     > >>>> --- a/linux-user/main.c
> >     > >>>> +++ b/linux-user/main.c
> >     > >>>> @@ -336,6 +336,16 @@ static void handle_arg_guest_base(const
> >     char *arg)
> >     > >>>>      have_guest_base = 1;
> >     > >>>>  }
> >     > >>>>
> >     > >>>> +static void handle_arg_mmap_base(const char *arg)
> >     > >>>> +{
> >     > >>>> +    int err = qemu_strtoul(arg, NULL, 0, &mmap_base);
> >     > >>>> +    if (err) {
> >     > >>>> +        fprintf(stderr, "Invalid mmap_base: %s, err: %d\n",
> >     arg, err);
> >     > >>>> +        exit(EXIT_FAILURE);
> >     > >>>> +    }
> >     > >>>> +    mmap_next_start = mmap_base;
> >     > >>>> +}
> >     > >>>> +
> >     > >>>>  static void handle_arg_reserved_va(const char *arg)
> >     > >>>>  {
> >     > >>>>      char *p;
> >     > >>>> @@ -440,6 +450,8 @@ static const struct qemu_argument
> >     arg_table[] = {
> >     > >>>>       "uname",      "set qemu uname release string to
> 'uname'"},
> >     > >>>>      {"B",          "QEMU_GUEST_BASE",  true,
> >     handle_arg_guest_base,
> >     > >>>>       "address",    "set guest_base address to 'address'"},
> >     > >>>> +    {"mmap_base",  "QEMU_MMAP_BASE",   true,
> >     handle_arg_mmap_base,
> >     > >>>> +     "",           "begin allocating guest pages at this
> >     host address"},
> >     > >>>>      {"R",          "QEMU_RESERVED_VA", true,
> >     handle_arg_reserved_va,
> >     > >>>>       "size",       "reserve 'size' bytes for guest virtual
> >     address space"},
> >     > >>>>      {"d",          "QEMU_LOG",         true,  handle_arg_log,
> >     > >>>> diff --git a/linux-user/mmap.c b/linux-user/mmap.c
> >     > >>>> index 8685f02e7e..3f35543acf 100644
> >     > >>>> --- a/linux-user/mmap.c
> >     > >>>> +++ b/linux-user/mmap.c
> >     > >>>> @@ -189,6 +189,7 @@ static int mmap_frag(abi_ulong real_start,
> >     > >>>>  # define TASK_UNMAPPED_BASE  0x40000000
> >     > >>>>  #endif
> >     > >>>>  abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
> >     > >>>> +abi_ulong mmap_base = TASK_UNMAPPED_BASE;
> >     > >>>>
> >     > >>>>  unsigned long last_brk;
> >     > >>>>
> >     > >>>> @@ -299,7 +300,7 @@ abi_ulong mmap_find_vma(abi_ulong start,
> >     abi_ulong size, abi_ulong align)
> >     > >>>>
> >     > >>>>              if ((addr & (align - 1)) == 0) {
> >     > >>>>                  /* Success.  */
> >     > >>>> -                if (start == mmap_next_start && addr >=
> >     TASK_UNMAPPED_BASE) {
> >     > >>>> +                if (start == mmap_next_start && addr >=
> >     mmap_base) {
> >     > >>>>                      mmap_next_start = addr + size;
> >     > >>>>                  }
> >     > >>>>                  return addr;
> >     > >>>> diff --git a/linux-user/qemu.h b/linux-user/qemu.h
> >     > >>>> index 560a68090e..83c00cfea2 100644
> >     > >>>> --- a/linux-user/qemu.h
> >     > >>>> +++ b/linux-user/qemu.h
> >     > >>>> @@ -161,6 +161,11 @@ void task_settid(TaskState *);
> >     > >>>>  void stop_all_tasks(void);
> >     > >>>>  extern const char *qemu_uname_release;
> >     > >>>>  extern unsigned long mmap_min_addr;
> >     > >>>> +/*
> >     > >>>> + * mmap_base is minimum address to use when allocating guest
> >     pages. All guest
> >     > >>>> + * pages will be allocated at this (guest) address or higher
> >     addresses.
> >     > >>>> + */
> >     > >>>> +extern abi_ulong mmap_base;
> >     > >>>>
> >     > >>>>  /* ??? See if we can avoid exposing so much of the loader
> >     internals.  */
> >     > >>>>
> >     > >>>> --
> >     > >>>> 2.25.0.265.gbab2e86ba0-goog
> >     > >>>>
> >     > >>>
> >     > >>> Friendly ping~
> >     > >>>
> >     > >>> Link to the page for the patch on patchwork:
> >     > >>> http://patchwork.ozlabs.org/patch/1242370/
> >     > >>>
> >     > >>
> >     > >
> >     > > Hi Laurent,
> >     > >
> >     > > Sure! We tried to run a program with TSAN enabled
> >     > > (
> https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual)
> >     > > in qemu, and got this error message:
> >     > > "FATAL: ThreadSanitizer: unexpected memory mapping
> >     > > 0x004000000000-0x004000253000"
> >     > >
> >     > > The root cause is that the default guest base address that qemu
> uses
> >     > > is 0x4000000000 (1ul<<38), and does not align with TSAN's
> >     expectation:
> >     > >
> >
> https://github.com/qemu/qemu/blob/c81acb643a61db199b9198add7972d8a8496b27c/linux-user/mmap.c#L187
> >     > >
> >
> https://github.com/llvm/llvm-project/blob/e7de00cf974a4e30d4900518ae8473a117efbd6c/compiler-rt/lib/tsan/rtl/tsan_platform.h#L150
> >     > >
> >     > > By setting QEMU_GUEST_BASE, we can place the guest program at a
> >     > > different base address in the host program. However, the h2g
> >     function
> >     > > (in |open_self_maps| in syscall.c) translates the address back
> to be
> >     > > based at 0x4000000000. E.g. the base address
> >     > > 0x4000000000+QEMU_GUEST_BASE will be converted to 0x4000000000
> with
> >     > > function h2g:
> >     > >
> >
> https://github.com/qemu/qemu/blob/c81acb643a61db199b9198add7972d8a8496b27c/linux-user/syscall.c#L7076
> >     > >
> >     > > One solution then, is to update |open_self_maps| in syscall.c to
> not
> >     > > use h2g. However this changes the meaning of QEMU_GUEST_BASE and
> >     could
> >     > > break existing programs that set non-zero QEMU_GUEST_BASE.
> >     > >
> >     > > So, how did qemu pick the base address 0x4000000000 then?
> Looking at
> >     > > the blame output in github, one recent change for the base
> >     address was
> >     > > committed 10 years ago:
> >     > > https://github.com/qemu/qemu/c|open_self_maps|
> >     <https://github.com/qemu/qemu/c%7Copen_self_maps%7C> in
> >     > > syscall.commit/14f24e1465edc44b9b4d89fbbea66e06088154e1
> >     > >
> >     > > Another one was committed 12 years ago:
> >     > >
> >
> https://github.com/qemu/qemu/commit/a03e2d421e7f33316750d6b7396d1a7e14b18d53
> >     > >
> >     > > The description of the first change is "place the default
> >     mapping base
> >     > > for 64-bit guests (on 64-bit hosts) outside the low 4G". It
> >     would seem
> >     > > that minimum requirements for the base address are:
> >     > > 1) addr >= 4G (for 64-bit)
> >     > > 2) addr < lowest address used by the host qemu program by some
> >     margin
> >     > >
> >     > > Given that
> >     > > 1) only TSAN explicitly check for the validity of addresses
> >     > > 2) 0x4000000000 is not a valid address for programs on aarch64
> >     > > (according to TSAN)
> >     > > 3) different architectures have different valid addresses,
> >     > > it would seem that adding an argument for changing the
> >     hard-coded base
> >     > > address is a viable solution.
> >     >
> >     > Thank you for the detailed explanation.
> >     >
> >     > Could you show me an example of the QEMU command line you use?
> >     >
> >     > I'm wondering if hardcoding directly the good value would be a
> better
> >     > solution?
> >     >
> >     > Richard, do you have some thoughts on this?
> >     >
> >     > Thanks,
> >     > Laurent
> >
> >     Sure! First we compile a simple race program with TSAN enabled:
> >     ( Simple race program is here:
> >
> https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual#usage
> >     )
> >     $ clang++ simple_race.cc -fsanitize=thread -fPIE -pie -g -o
> simple_race
> >
> >     Next we run a script for executing the program, and it exports
> >     environment variables:
> >     QEMU_CPU=max
> >     QEMU_MMAP_BASE=0x0000005500000000
> >
> >     And runs the QEMU program:
> >     $ qemu-aarch64 simple_race
> >
> >     I changed the default value for all other programs that I am working
> >     with, and so far we haven't seen any problems.
> >     For the patch, it might be better to err on the safe side and not
> >     change the hard-coded value, as it might cause potential breakages
> for
> >     other users.
> >     Though I don't know much about how the default value might be used or
> >     depended on by other programs, so if you see no concerns for updating
> >     the value, I'd be happy to change it too.
> >
> >
> > Friendly ping~
> >
> > Link to the page for the patch on patchwork:
> > http://patchwork.ozlabs.org/patch/1242370/
>
> I would prefer if you hardcode the value for aarch64 rather than adding
> a new parameter.
>
> Thanks,
> Laurent
>

For sure! I will send a patch shortly for hardcoding the value on aarch64.

Note that although the value has been working fine for our tests, we are
not sure that it won't break other tests on other systems.

Regards,
Lirong

Patch
diff mbox series

diff --git a/linux-user/main.c b/linux-user/main.c
index fba833aac9..c01af6bfee 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -336,6 +336,16 @@  static void handle_arg_guest_base(const char *arg)
     have_guest_base = 1;
 }
 
+static void handle_arg_mmap_base(const char *arg)
+{
+    int err = qemu_strtoul(arg, NULL, 0, &mmap_base);
+    if (err) {
+        fprintf(stderr, "Invalid mmap_base: %s, err: %d\n", arg, err);
+        exit(EXIT_FAILURE);
+    }
+    mmap_next_start = mmap_base;
+}
+
 static void handle_arg_reserved_va(const char *arg)
 {
     char *p;
@@ -440,6 +450,8 @@  static const struct qemu_argument arg_table[] = {
      "uname",      "set qemu uname release string to 'uname'"},
     {"B",          "QEMU_GUEST_BASE",  true,  handle_arg_guest_base,
      "address",    "set guest_base address to 'address'"},
+    {"mmap_base",  "QEMU_MMAP_BASE",   true,  handle_arg_mmap_base,
+     "",           "begin allocating guest pages at this host address"},
     {"R",          "QEMU_RESERVED_VA", true,  handle_arg_reserved_va,
      "size",       "reserve 'size' bytes for guest virtual address space"},
     {"d",          "QEMU_LOG",         true,  handle_arg_log,
diff --git a/linux-user/mmap.c b/linux-user/mmap.c
index 8685f02e7e..3f35543acf 100644
--- a/linux-user/mmap.c
+++ b/linux-user/mmap.c
@@ -189,6 +189,7 @@  static int mmap_frag(abi_ulong real_start,
 # define TASK_UNMAPPED_BASE  0x40000000
 #endif
 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
+abi_ulong mmap_base = TASK_UNMAPPED_BASE;
 
 unsigned long last_brk;
 
@@ -299,7 +300,7 @@  abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
 
             if ((addr & (align - 1)) == 0) {
                 /* Success.  */
-                if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
+                if (start == mmap_next_start && addr >= mmap_base) {
                     mmap_next_start = addr + size;
                 }
                 return addr;
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 560a68090e..83c00cfea2 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -161,6 +161,11 @@  void task_settid(TaskState *);
 void stop_all_tasks(void);
 extern const char *qemu_uname_release;
 extern unsigned long mmap_min_addr;
+/*
+ * mmap_base is minimum address to use when allocating guest pages. All guest
+ * pages will be allocated at this (guest) address or higher addresses.
+ */
+extern abi_ulong mmap_base;
 
 /* ??? See if we can avoid exposing so much of the loader internals.  */