diff mbox series

[v6,8/8] linux-user: Load pie executables at upper memory

Message ID 20230801232745.4125-9-deller@gmx.de
State New
Headers show
Series linux-user: brk fixes | expand

Commit Message

Helge Deller Aug. 1, 2023, 11:27 p.m. UTC
Fix the elf loader to calculate a valid TASK_UNMAPPED_BASE address for all
32-bit architectures, based on the GUEST_ADDR_MAX constant.

Additionally modify the elf loader to load dynamic pie executables at
around:
~ 0x5500000000  for 64-bit guest binaries on 64-bit host,
- 0x00300000    for 32-bit guest binaries on 64-bit host, and
- 0x00000000    for 32-bit guest binaries on 32-bit host.

With this patch the Thread Sanitizer (TSan) application will work again,
as in commit aab613fb9597 ("linux-user: Update TASK_UNMAPPED_BASE for
aarch64").

Signed-off-by: Helge Deller <deller@gmx.de>
---
 linux-user/elfload.c |  6 ++++--
 linux-user/loader.h  | 12 ++++++++++++
 linux-user/mmap.c    | 35 ++++++++++++++++++-----------------
 3 files changed, 34 insertions(+), 19 deletions(-)

--
2.41.0

Comments

Akihiko Odaki Aug. 2, 2023, 7:49 a.m. UTC | #1
On 2023/08/02 8:27, Helge Deller wrote:
> Fix the elf loader to calculate a valid TASK_UNMAPPED_BASE address for all
> 32-bit architectures, based on the GUEST_ADDR_MAX constant.
> 
> Additionally modify the elf loader to load dynamic pie executables at
> around:
> ~ 0x5500000000  for 64-bit guest binaries on 64-bit host,
> - 0x00300000    for 32-bit guest binaries on 64-bit host, and
> - 0x00000000    for 32-bit guest binaries on 32-bit host.

Why do you change guest addresses depending on the host?

> 
> With this patch the Thread Sanitizer (TSan) application will work again,
> as in commit aab613fb9597 ("linux-user: Update TASK_UNMAPPED_BASE for
> aarch64").
> 
> Signed-off-by: Helge Deller <deller@gmx.de>
> ---
>   linux-user/elfload.c |  6 ++++--
>   linux-user/loader.h  | 12 ++++++++++++
>   linux-user/mmap.c    | 35 ++++++++++++++++++-----------------
>   3 files changed, 34 insertions(+), 19 deletions(-)
> 
> diff --git a/linux-user/elfload.c b/linux-user/elfload.c
> index 47a118e430..8f5a79b537 100644
> --- a/linux-user/elfload.c
> +++ b/linux-user/elfload.c
> @@ -3021,6 +3021,7 @@ static void load_elf_image(const char *image_name, int image_fd,
>       struct elfhdr *ehdr = (struct elfhdr *)bprm_buf;
>       struct elf_phdr *phdr;
>       abi_ulong load_addr, load_bias, loaddr, hiaddr, error;
> +    unsigned long load_offset = 0;
>       int i, retval, prot_exec;
>       Error *err = NULL;
>       bool is_main_executable;
> @@ -3121,6 +3122,7 @@ static void load_elf_image(const char *image_name, int image_fd,
>                * select guest_base.  In this case we pass a size.
>                */
>               probe_guest_base(image_name, 0, hiaddr - loaddr);
> +            load_offset = TASK_UNMAPPED_BASE_PIE;
>           }
>       }
> 
> @@ -3138,7 +3140,7 @@ static void load_elf_image(const char *image_name, int image_fd,
>        * In both cases, we will overwrite pages in this range with mappings
>        * from the executable.
>        */
> -    load_addr = target_mmap(loaddr, (size_t)hiaddr - loaddr + 1, PROT_NONE,
> +    load_addr = target_mmap(loaddr + load_offset, (size_t)hiaddr - loaddr + 1, PROT_NONE,
>                               MAP_PRIVATE | MAP_ANON | MAP_NORESERVE |
>                               (is_main_executable ? MAP_FIXED : 0),
>                               -1, 0);
> @@ -3176,7 +3178,7 @@ static void load_elf_image(const char *image_name, int image_fd,
>       info->start_data = -1;
>       info->end_data = 0;
>       /* possible start for brk is behind all sections of this ELF file. */
> -    info->brk = TARGET_PAGE_ALIGN(hiaddr);
> +    info->brk = TARGET_PAGE_ALIGN(load_offset + hiaddr);
>       info->elf_flags = ehdr->e_flags;
> 
>       prot_exec = PROT_EXEC;
> diff --git a/linux-user/loader.h b/linux-user/loader.h
> index 59cbeacf24..3bbfc108eb 100644
> --- a/linux-user/loader.h
> +++ b/linux-user/loader.h
> @@ -18,6 +18,18 @@
>   #ifndef LINUX_USER_LOADER_H
>   #define LINUX_USER_LOADER_H
> 
> +/* where to map binaries? */
> +#if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
> +# define TASK_UNMAPPED_BASE_PIE 0x5500000000
> +# define TASK_UNMAPPED_BASE	0x7000000000
> +#elif HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 32
> +# define TASK_UNMAPPED_BASE_PIE	0x00300000
> +# define TASK_UNMAPPED_BASE	(GUEST_ADDR_MAX - 0x20000000 + 1)
> +#else /* HOST_LONG_BITS == 32 && TARGET_ABI_BITS == 32 */
> +# define TASK_UNMAPPED_BASE_PIE	0x00000000
> +# define TASK_UNMAPPED_BASE	0x40000000
> +#endif
> +
>   /*
>    * Read a good amount of data initially, to hopefully get all the
>    * program headers loaded.
> diff --git a/linux-user/mmap.c b/linux-user/mmap.c
> index c624feead0..3441198e21 100644
> --- a/linux-user/mmap.c
> +++ b/linux-user/mmap.c
> @@ -23,6 +23,7 @@
>   #include "user-internals.h"
>   #include "user-mmap.h"
>   #include "target_mman.h"
> +#include "loader.h"
> 
>   static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
>   static __thread int mmap_lock_count;
> @@ -295,23 +296,6 @@ static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last,
>       return true;
>   }
> 
> -#if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
> -#ifdef TARGET_AARCH64
> -# define TASK_UNMAPPED_BASE  0x5500000000
> -#else
> -# define TASK_UNMAPPED_BASE  0x4000000000
> -#endif
> -#elif HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 32
> -#ifdef TARGET_HPPA
> -# define TASK_UNMAPPED_BASE  0xfa000000
> -#else
> -# define TASK_UNMAPPED_BASE  0xe0000000
> -#endif
> -#else /* HOST_LONG_BITS == 32 && TARGET_ABI_BITS == 32 */
> -# define TASK_UNMAPPED_BASE  0x40000000
> -#endif
> -abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
> -
>   unsigned long last_brk;
> 
>   /*
> @@ -344,6 +328,23 @@ abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
>       abi_ulong addr;
>       int wrapped, repeat;
> 
> +    static abi_ulong mmap_next_start;
> +
> +    /* initialize mmap_next_start if necessary */
> +    if (!mmap_next_start) {
> +        mmap_next_start = TASK_UNMAPPED_BASE;
> +
> +        /* do sanity checks on guest memory layout */
> +        if (mmap_next_start >= GUEST_ADDR_MAX) {
> +            mmap_next_start = GUEST_ADDR_MAX - 0x1000000000 + 1;

What if GUEST_ADDR_MAX < 0x1000000000? I think you can just return a 
hard error when mmap_next_start >= GUEST_ADDR_MAX.

> +        }
> +
> +        if (TASK_UNMAPPED_BASE_PIE >= mmap_next_start) {
> +            fprintf(stderr, "Memory too small for PIE executables.\n");

Perhaps it's better to use error_report() for new code.

> +            exit(EXIT_FAILURE);
> +        }
> +    }
> +
>       align = MAX(align, qemu_host_page_size);
> 
>       /* If 'start' == 0, then a default start address is used. */
> --
> 2.41.0
>
Helge Deller Aug. 2, 2023, 8:42 a.m. UTC | #2
On 8/2/23 09:49, Akihiko Odaki wrote:
> On 2023/08/02 8:27, Helge Deller wrote:
>> Fix the elf loader to calculate a valid TASK_UNMAPPED_BASE address for all
>> 32-bit architectures, based on the GUEST_ADDR_MAX constant.
>>
>> Additionally modify the elf loader to load dynamic pie executables at
>> around:
>> ~ 0x5500000000  for 64-bit guest binaries on 64-bit host,
>> - 0x00300000    for 32-bit guest binaries on 64-bit host, and
>> - 0x00000000    for 32-bit guest binaries on 32-bit host.
>
> Why do you change guest addresses depending on the host?

The addresses are guest-addresses.
A 32-bit guest PIE can't be loaded at e.g. 0x5500000000,
while a 64-bit guest PIE needs to be loaded at 0x5500000000.

>> With this patch the Thread Sanitizer (TSan) application will work again,
>> as in commit aab613fb9597 ("linux-user: Update TASK_UNMAPPED_BASE for
>> aarch64").
>>
>> Signed-off-by: Helge Deller <deller@gmx.de>
>> ---
>>   linux-user/elfload.c |  6 ++++--
>>   linux-user/loader.h  | 12 ++++++++++++
>>   linux-user/mmap.c    | 35 ++++++++++++++++++-----------------
>>   3 files changed, 34 insertions(+), 19 deletions(-)
>>
>> diff --git a/linux-user/elfload.c b/linux-user/elfload.c
>> index 47a118e430..8f5a79b537 100644
>> --- a/linux-user/elfload.c
>> +++ b/linux-user/elfload.c
>> @@ -3021,6 +3021,7 @@ static void load_elf_image(const char *image_name, int image_fd,
>>       struct elfhdr *ehdr = (struct elfhdr *)bprm_buf;
>>       struct elf_phdr *phdr;
>>       abi_ulong load_addr, load_bias, loaddr, hiaddr, error;
>> +    unsigned long load_offset = 0;
>>       int i, retval, prot_exec;
>>       Error *err = NULL;
>>       bool is_main_executable;
>> @@ -3121,6 +3122,7 @@ static void load_elf_image(const char *image_name, int image_fd,
>>                * select guest_base.  In this case we pass a size.
>>                */
>>               probe_guest_base(image_name, 0, hiaddr - loaddr);
>> +            load_offset = TASK_UNMAPPED_BASE_PIE;
>>           }
>>       }
>>
>> @@ -3138,7 +3140,7 @@ static void load_elf_image(const char *image_name, int image_fd,
>>        * In both cases, we will overwrite pages in this range with mappings
>>        * from the executable.
>>        */
>> -    load_addr = target_mmap(loaddr, (size_t)hiaddr - loaddr + 1, PROT_NONE,
>> +    load_addr = target_mmap(loaddr + load_offset, (size_t)hiaddr - loaddr + 1, PROT_NONE,
>>                               MAP_PRIVATE | MAP_ANON | MAP_NORESERVE |
>>                               (is_main_executable ? MAP_FIXED : 0),
>>                               -1, 0);
>> @@ -3176,7 +3178,7 @@ static void load_elf_image(const char *image_name, int image_fd,
>>       info->start_data = -1;
>>       info->end_data = 0;
>>       /* possible start for brk is behind all sections of this ELF file. */
>> -    info->brk = TARGET_PAGE_ALIGN(hiaddr);
>> +    info->brk = TARGET_PAGE_ALIGN(load_offset + hiaddr);
>>       info->elf_flags = ehdr->e_flags;
>>
>>       prot_exec = PROT_EXEC;
>> diff --git a/linux-user/loader.h b/linux-user/loader.h
>> index 59cbeacf24..3bbfc108eb 100644
>> --- a/linux-user/loader.h
>> +++ b/linux-user/loader.h
>> @@ -18,6 +18,18 @@
>>   #ifndef LINUX_USER_LOADER_H
>>   #define LINUX_USER_LOADER_H
>>
>> +/* where to map binaries? */
>> +#if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
>> +# define TASK_UNMAPPED_BASE_PIE 0x5500000000
>> +# define TASK_UNMAPPED_BASE    0x7000000000
>> +#elif HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 32
>> +# define TASK_UNMAPPED_BASE_PIE    0x00300000
>> +# define TASK_UNMAPPED_BASE    (GUEST_ADDR_MAX - 0x20000000 + 1)
>> +#else /* HOST_LONG_BITS == 32 && TARGET_ABI_BITS == 32 */
>> +# define TASK_UNMAPPED_BASE_PIE    0x00000000
>> +# define TASK_UNMAPPED_BASE    0x40000000
>> +#endif
>> +
>>   /*
>>    * Read a good amount of data initially, to hopefully get all the
>>    * program headers loaded.
>> diff --git a/linux-user/mmap.c b/linux-user/mmap.c
>> index c624feead0..3441198e21 100644
>> --- a/linux-user/mmap.c
>> +++ b/linux-user/mmap.c
>> @@ -23,6 +23,7 @@
>>   #include "user-internals.h"
>>   #include "user-mmap.h"
>>   #include "target_mman.h"
>> +#include "loader.h"
>>
>>   static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
>>   static __thread int mmap_lock_count;
>> @@ -295,23 +296,6 @@ static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last,
>>       return true;
>>   }
>>
>> -#if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
>> -#ifdef TARGET_AARCH64
>> -# define TASK_UNMAPPED_BASE  0x5500000000
>> -#else
>> -# define TASK_UNMAPPED_BASE  0x4000000000
>> -#endif
>> -#elif HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 32
>> -#ifdef TARGET_HPPA
>> -# define TASK_UNMAPPED_BASE  0xfa000000
>> -#else
>> -# define TASK_UNMAPPED_BASE  0xe0000000
>> -#endif
>> -#else /* HOST_LONG_BITS == 32 && TARGET_ABI_BITS == 32 */
>> -# define TASK_UNMAPPED_BASE  0x40000000
>> -#endif
>> -abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
>> -
>>   unsigned long last_brk;
>>
>>   /*
>> @@ -344,6 +328,23 @@ abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
>>       abi_ulong addr;
>>       int wrapped, repeat;
>>
>> +    static abi_ulong mmap_next_start;
>> +
>> +    /* initialize mmap_next_start if necessary */
>> +    if (!mmap_next_start) {
>> +        mmap_next_start = TASK_UNMAPPED_BASE;
>> +
>> +        /* do sanity checks on guest memory layout */
>> +        if (mmap_next_start >= GUEST_ADDR_MAX) {
>> +            mmap_next_start = GUEST_ADDR_MAX - 0x1000000000 + 1;
>
> What if GUEST_ADDR_MAX < 0x1000000000?

this check affects 64-bit executables only where GUEST_ADDR_MAX is bigger
than that number. But I agree it's not directly visible from the code.
32-bit ones are taken care of where TASK_UNMAPPED_BASE is defined.

> I think you can just return a hard error when mmap_next_start >= GUEST_ADDR_MAX.

Can't happen, but I will rewrite it.

>> +        }
>> +
>> +        if (TASK_UNMAPPED_BASE_PIE >= mmap_next_start) {
>> +            fprintf(stderr, "Memory too small for PIE executables.\n");
>
> Perhaps it's better to use error_report() for new code.

Ok.

Helge
Akihiko Odaki Aug. 2, 2023, 8:44 a.m. UTC | #3
On 2023/08/02 17:42, Helge Deller wrote:
> On 8/2/23 09:49, Akihiko Odaki wrote:
>> On 2023/08/02 8:27, Helge Deller wrote:
>>> Fix the elf loader to calculate a valid TASK_UNMAPPED_BASE address 
>>> for all
>>> 32-bit architectures, based on the GUEST_ADDR_MAX constant.
>>>
>>> Additionally modify the elf loader to load dynamic pie executables at
>>> around:
>>> ~ 0x5500000000  for 64-bit guest binaries on 64-bit host,
>>> - 0x00300000    for 32-bit guest binaries on 64-bit host, and
>>> - 0x00000000    for 32-bit guest binaries on 32-bit host.
>>
>> Why do you change guest addresses depending on the host?
> 
> The addresses are guest-addresses.
> A 32-bit guest PIE can't be loaded at e.g. 0x5500000000,
> while a 64-bit guest PIE needs to be loaded at 0x5500000000.

I mean, why do you use address 0x00000000 for 32-bit guest binaries on 
32-bit host while you use address 0x00300000 on 64-bit host?

> 
>>> With this patch the Thread Sanitizer (TSan) application will work again,
>>> as in commit aab613fb9597 ("linux-user: Update TASK_UNMAPPED_BASE for
>>> aarch64").
>>>
>>> Signed-off-by: Helge Deller <deller@gmx.de>
>>> ---
>>>   linux-user/elfload.c |  6 ++++--
>>>   linux-user/loader.h  | 12 ++++++++++++
>>>   linux-user/mmap.c    | 35 ++++++++++++++++++-----------------
>>>   3 files changed, 34 insertions(+), 19 deletions(-)
>>>
>>> diff --git a/linux-user/elfload.c b/linux-user/elfload.c
>>> index 47a118e430..8f5a79b537 100644
>>> --- a/linux-user/elfload.c
>>> +++ b/linux-user/elfload.c
>>> @@ -3021,6 +3021,7 @@ static void load_elf_image(const char 
>>> *image_name, int image_fd,
>>>       struct elfhdr *ehdr = (struct elfhdr *)bprm_buf;
>>>       struct elf_phdr *phdr;
>>>       abi_ulong load_addr, load_bias, loaddr, hiaddr, error;
>>> +    unsigned long load_offset = 0;
>>>       int i, retval, prot_exec;
>>>       Error *err = NULL;
>>>       bool is_main_executable;
>>> @@ -3121,6 +3122,7 @@ static void load_elf_image(const char 
>>> *image_name, int image_fd,
>>>                * select guest_base.  In this case we pass a size.
>>>                */
>>>               probe_guest_base(image_name, 0, hiaddr - loaddr);
>>> +            load_offset = TASK_UNMAPPED_BASE_PIE;
>>>           }
>>>       }
>>>
>>> @@ -3138,7 +3140,7 @@ static void load_elf_image(const char 
>>> *image_name, int image_fd,
>>>        * In both cases, we will overwrite pages in this range with 
>>> mappings
>>>        * from the executable.
>>>        */
>>> -    load_addr = target_mmap(loaddr, (size_t)hiaddr - loaddr + 1, 
>>> PROT_NONE,
>>> +    load_addr = target_mmap(loaddr + load_offset, (size_t)hiaddr - 
>>> loaddr + 1, PROT_NONE,
>>>                               MAP_PRIVATE | MAP_ANON | MAP_NORESERVE |
>>>                               (is_main_executable ? MAP_FIXED : 0),
>>>                               -1, 0);
>>> @@ -3176,7 +3178,7 @@ static void load_elf_image(const char 
>>> *image_name, int image_fd,
>>>       info->start_data = -1;
>>>       info->end_data = 0;
>>>       /* possible start for brk is behind all sections of this ELF 
>>> file. */
>>> -    info->brk = TARGET_PAGE_ALIGN(hiaddr);
>>> +    info->brk = TARGET_PAGE_ALIGN(load_offset + hiaddr);
>>>       info->elf_flags = ehdr->e_flags;
>>>
>>>       prot_exec = PROT_EXEC;
>>> diff --git a/linux-user/loader.h b/linux-user/loader.h
>>> index 59cbeacf24..3bbfc108eb 100644
>>> --- a/linux-user/loader.h
>>> +++ b/linux-user/loader.h
>>> @@ -18,6 +18,18 @@
>>>   #ifndef LINUX_USER_LOADER_H
>>>   #define LINUX_USER_LOADER_H
>>>
>>> +/* where to map binaries? */
>>> +#if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
>>> +# define TASK_UNMAPPED_BASE_PIE 0x5500000000
>>> +# define TASK_UNMAPPED_BASE    0x7000000000
>>> +#elif HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 32
>>> +# define TASK_UNMAPPED_BASE_PIE    0x00300000
>>> +# define TASK_UNMAPPED_BASE    (GUEST_ADDR_MAX - 0x20000000 + 1)
>>> +#else /* HOST_LONG_BITS == 32 && TARGET_ABI_BITS == 32 */
>>> +# define TASK_UNMAPPED_BASE_PIE    0x00000000
>>> +# define TASK_UNMAPPED_BASE    0x40000000
>>> +#endif
>>> +
>>>   /*
>>>    * Read a good amount of data initially, to hopefully get all the
>>>    * program headers loaded.
>>> diff --git a/linux-user/mmap.c b/linux-user/mmap.c
>>> index c624feead0..3441198e21 100644
>>> --- a/linux-user/mmap.c
>>> +++ b/linux-user/mmap.c
>>> @@ -23,6 +23,7 @@
>>>   #include "user-internals.h"
>>>   #include "user-mmap.h"
>>>   #include "target_mman.h"
>>> +#include "loader.h"
>>>
>>>   static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
>>>   static __thread int mmap_lock_count;
>>> @@ -295,23 +296,6 @@ static bool mmap_frag(abi_ulong real_start, 
>>> abi_ulong start, abi_ulong last,
>>>       return true;
>>>   }
>>>
>>> -#if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
>>> -#ifdef TARGET_AARCH64
>>> -# define TASK_UNMAPPED_BASE  0x5500000000
>>> -#else
>>> -# define TASK_UNMAPPED_BASE  0x4000000000
>>> -#endif
>>> -#elif HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 32
>>> -#ifdef TARGET_HPPA
>>> -# define TASK_UNMAPPED_BASE  0xfa000000
>>> -#else
>>> -# define TASK_UNMAPPED_BASE  0xe0000000
>>> -#endif
>>> -#else /* HOST_LONG_BITS == 32 && TARGET_ABI_BITS == 32 */
>>> -# define TASK_UNMAPPED_BASE  0x40000000
>>> -#endif
>>> -abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
>>> -
>>>   unsigned long last_brk;
>>>
>>>   /*
>>> @@ -344,6 +328,23 @@ abi_ulong mmap_find_vma(abi_ulong start, 
>>> abi_ulong size, abi_ulong align)
>>>       abi_ulong addr;
>>>       int wrapped, repeat;
>>>
>>> +    static abi_ulong mmap_next_start;
>>> +
>>> +    /* initialize mmap_next_start if necessary */
>>> +    if (!mmap_next_start) {
>>> +        mmap_next_start = TASK_UNMAPPED_BASE;
>>> +
>>> +        /* do sanity checks on guest memory layout */
>>> +        if (mmap_next_start >= GUEST_ADDR_MAX) {
>>> +            mmap_next_start = GUEST_ADDR_MAX - 0x1000000000 + 1;
>>
>> What if GUEST_ADDR_MAX < 0x1000000000?
> 
> this check affects 64-bit executables only where GUEST_ADDR_MAX is bigger
> than that number. But I agree it's not directly visible from the code.
> 32-bit ones are taken care of where TASK_UNMAPPED_BASE is defined.
> 
>> I think you can just return a hard error when mmap_next_start >= 
>> GUEST_ADDR_MAX.
> 
> Can't happen, but I will rewrite it.
> 
>>> +        }
>>> +
>>> +        if (TASK_UNMAPPED_BASE_PIE >= mmap_next_start) {
>>> +            fprintf(stderr, "Memory too small for PIE executables.\n");
>>
>> Perhaps it's better to use error_report() for new code.
> 
> Ok.
> 
> Helge
Helge Deller Aug. 2, 2023, 9:34 a.m. UTC | #4
On 8/2/23 10:44, Akihiko Odaki wrote:
> On 2023/08/02 17:42, Helge Deller wrote:
>> On 8/2/23 09:49, Akihiko Odaki wrote:
>>> On 2023/08/02 8:27, Helge Deller wrote:
>>>> Fix the elf loader to calculate a valid TASK_UNMAPPED_BASE address for all
>>>> 32-bit architectures, based on the GUEST_ADDR_MAX constant.
>>>>
>>>> Additionally modify the elf loader to load dynamic pie executables at
>>>> around:
>>>> ~ 0x5500000000  for 64-bit guest binaries on 64-bit host,
>>>> - 0x00300000    for 32-bit guest binaries on 64-bit host, and
>>>> - 0x00000000    for 32-bit guest binaries on 32-bit host.
>>>
>>> Why do you change guest addresses depending on the host?
>>
>> The addresses are guest-addresses.
>> A 32-bit guest PIE can't be loaded at e.g. 0x5500000000,
>> while a 64-bit guest PIE needs to be loaded at 0x5500000000.
>
> I mean, why do you use address 0x00000000 for 32-bit guest binaries on 32-bit host while you use address 0x00300000 on 64-bit host?

To keep the memory pressure for the 32-bit qemu binary minimal.
On 64-bit host we have the full 32-bit address space for the guest.

Helge
Akihiko Odaki Aug. 2, 2023, 9:58 a.m. UTC | #5
On 2023/08/02 18:34, Helge Deller wrote:
> On 8/2/23 10:44, Akihiko Odaki wrote:
>> On 2023/08/02 17:42, Helge Deller wrote:
>>> On 8/2/23 09:49, Akihiko Odaki wrote:
>>>> On 2023/08/02 8:27, Helge Deller wrote:
>>>>> Fix the elf loader to calculate a valid TASK_UNMAPPED_BASE address 
>>>>> for all
>>>>> 32-bit architectures, based on the GUEST_ADDR_MAX constant.
>>>>>
>>>>> Additionally modify the elf loader to load dynamic pie executables at
>>>>> around:
>>>>> ~ 0x5500000000  for 64-bit guest binaries on 64-bit host,
>>>>> - 0x00300000    for 32-bit guest binaries on 64-bit host, and
>>>>> - 0x00000000    for 32-bit guest binaries on 32-bit host.
>>>>
>>>> Why do you change guest addresses depending on the host?
>>>
>>> The addresses are guest-addresses.
>>> A 32-bit guest PIE can't be loaded at e.g. 0x5500000000,
>>> while a 64-bit guest PIE needs to be loaded at 0x5500000000.
>>
>> I mean, why do you use address 0x00000000 for 32-bit guest binaries on 
>> 32-bit host while you use address 0x00300000 on 64-bit host?
> 
> To keep the memory pressure for the 32-bit qemu binary minimal.
> On 64-bit host we have the full 32-bit address space for the guest.
> 
> Helge
> 

That makes sense. I'm worried that using 0x00000000 may break NULL 
checks on the guest though.

Regards,
Akihiko Odaki
Helge Deller Aug. 2, 2023, 10:35 a.m. UTC | #6
On 8/2/23 11:58, Akihiko Odaki wrote:
> On 2023/08/02 18:34, Helge Deller wrote:
>> On 8/2/23 10:44, Akihiko Odaki wrote:
>>> On 2023/08/02 17:42, Helge Deller wrote:
>>>> On 8/2/23 09:49, Akihiko Odaki wrote:
>>>>> On 2023/08/02 8:27, Helge Deller wrote:
>>>>>> Fix the elf loader to calculate a valid TASK_UNMAPPED_BASE address for all
>>>>>> 32-bit architectures, based on the GUEST_ADDR_MAX constant.
>>>>>>
>>>>>> Additionally modify the elf loader to load dynamic pie executables at
>>>>>> around:
>>>>>> ~ 0x5500000000  for 64-bit guest binaries on 64-bit host,
>>>>>> - 0x00300000    for 32-bit guest binaries on 64-bit host, and
>>>>>> - 0x00000000    for 32-bit guest binaries on 32-bit host.
>>>>>
>>>>> Why do you change guest addresses depending on the host?
>>>>
>>>> The addresses are guest-addresses.
>>>> A 32-bit guest PIE can't be loaded at e.g. 0x5500000000,
>>>> while a 64-bit guest PIE needs to be loaded at 0x5500000000.
>>>
>>> I mean, why do you use address 0x00000000 for 32-bit guest binaries on 32-bit host while you use address 0x00300000 on 64-bit host?
>>
>> To keep the memory pressure for the 32-bit qemu binary minimal.
>> On 64-bit host we have the full 32-bit address space for the guest.
>>
>> Helge
>>
>
> That makes sense. I'm worried that using 0x00000000 may break NULL checks on the guest though.

probably not, because 0 means to search any free space.
It's not fixed, it will be searched from there.

Helge
Richard Henderson Aug. 2, 2023, 6:36 p.m. UTC | #7
On 8/1/23 16:27, Helge Deller wrote:
> +/* where to map binaries? */
> +#if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
> +# define TASK_UNMAPPED_BASE_PIE 0x5500000000
> +# define TASK_UNMAPPED_BASE	0x7000000000
> +#elif HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 32
> +# define TASK_UNMAPPED_BASE_PIE	0x00300000
> +# define TASK_UNMAPPED_BASE	(GUEST_ADDR_MAX - 0x20000000 + 1)
> +#else /* HOST_LONG_BITS == 32 && TARGET_ABI_BITS == 32 */
> +# define TASK_UNMAPPED_BASE_PIE	0x00000000
> +# define TASK_UNMAPPED_BASE	0x40000000
> +#endif

It would probably be easier to follow if you use the kernel's name: ELF_ET_DYN_BASE. 
Should be in linux-user/$GUEST/target_mmap.h with TASK_UNMAPPED_BASE, per my comment vs 
patch 7.


> +        /* do sanity checks on guest memory layout */
> +        if (mmap_next_start >= GUEST_ADDR_MAX) {
> +            mmap_next_start = GUEST_ADDR_MAX - 0x1000000000 + 1;
> +        }

What in the world is that random number?  It certainly won't compile on 32-bit host, and 
certainly isn't relevant to a 32-bit guest.

> +        if (TASK_UNMAPPED_BASE_PIE >= mmap_next_start) {
> +            fprintf(stderr, "Memory too small for PIE executables.\n");
> +            exit(EXIT_FAILURE);
> +        }

Really bad timing for this diagnostic.  What if a PIE executable isn't even being used?

Both TASK_UNMAPPED_BASE and ELF_ET_DYN_BASE could be computed in main (or a subroutine), 
when reserved_va is assigned.


r~
Helge Deller Aug. 2, 2023, 7:57 p.m. UTC | #8
On 8/2/23 20:36, Richard Henderson wrote:
> On 8/1/23 16:27, Helge Deller wrote:
>> +/* where to map binaries? */
>> +#if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
>> +# define TASK_UNMAPPED_BASE_PIE 0x5500000000
>> +# define TASK_UNMAPPED_BASE    0x7000000000
>> +#elif HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 32
>> +# define TASK_UNMAPPED_BASE_PIE    0x00300000
>> +# define TASK_UNMAPPED_BASE    (GUEST_ADDR_MAX - 0x20000000 + 1)
>> +#else /* HOST_LONG_BITS == 32 && TARGET_ABI_BITS == 32 */
>> +# define TASK_UNMAPPED_BASE_PIE    0x00000000
>> +# define TASK_UNMAPPED_BASE    0x40000000
>> +#endif
>
> It would probably be easier to follow if you use the kernel's name:
> ELF_ET_DYN_BASE.

Agreed.

> Should be in linux-user/$GUEST/target_mmap.h with
> TASK_UNMAPPED_BASE, per my comment vs patch 7.

Maybe, but see my comments/answers there...

>> +        /* do sanity checks on guest memory layout */
>> +        if (mmap_next_start >= GUEST_ADDR_MAX) {
>> +            mmap_next_start = GUEST_ADDR_MAX - 0x1000000000 + 1;
>> +        }
>
> What in the world is that random number?

It's random. Idea is to keep enough space for shared libs below GUEST_ADDR_MAX.

> It certainly won't compile
> on 32-bit host, and certainly isn't relevant to a 32-bit guest.

That code will never be compiled/executed on 32-bit guests & hosts.
The defines for TASK_UNMAPPED_BASE take already care of that.
But I agree it's not directly visible (and probably not nice that way).

>> +        if (TASK_UNMAPPED_BASE_PIE >= mmap_next_start) {
>> +            fprintf(stderr, "Memory too small for PIE executables.\n");
>> +            exit(EXIT_FAILURE);
>> +        }
>
> Really bad timing for this diagnostic.  What if a PIE executable isn't even being used?
>
> Both TASK_UNMAPPED_BASE and ELF_ET_DYN_BASE could be computed in main (or a subroutine), when reserved_va is assigned.

Helge
diff mbox series

Patch

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 47a118e430..8f5a79b537 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -3021,6 +3021,7 @@  static void load_elf_image(const char *image_name, int image_fd,
     struct elfhdr *ehdr = (struct elfhdr *)bprm_buf;
     struct elf_phdr *phdr;
     abi_ulong load_addr, load_bias, loaddr, hiaddr, error;
+    unsigned long load_offset = 0;
     int i, retval, prot_exec;
     Error *err = NULL;
     bool is_main_executable;
@@ -3121,6 +3122,7 @@  static void load_elf_image(const char *image_name, int image_fd,
              * select guest_base.  In this case we pass a size.
              */
             probe_guest_base(image_name, 0, hiaddr - loaddr);
+            load_offset = TASK_UNMAPPED_BASE_PIE;
         }
     }

@@ -3138,7 +3140,7 @@  static void load_elf_image(const char *image_name, int image_fd,
      * In both cases, we will overwrite pages in this range with mappings
      * from the executable.
      */
-    load_addr = target_mmap(loaddr, (size_t)hiaddr - loaddr + 1, PROT_NONE,
+    load_addr = target_mmap(loaddr + load_offset, (size_t)hiaddr - loaddr + 1, PROT_NONE,
                             MAP_PRIVATE | MAP_ANON | MAP_NORESERVE |
                             (is_main_executable ? MAP_FIXED : 0),
                             -1, 0);
@@ -3176,7 +3178,7 @@  static void load_elf_image(const char *image_name, int image_fd,
     info->start_data = -1;
     info->end_data = 0;
     /* possible start for brk is behind all sections of this ELF file. */
-    info->brk = TARGET_PAGE_ALIGN(hiaddr);
+    info->brk = TARGET_PAGE_ALIGN(load_offset + hiaddr);
     info->elf_flags = ehdr->e_flags;

     prot_exec = PROT_EXEC;
diff --git a/linux-user/loader.h b/linux-user/loader.h
index 59cbeacf24..3bbfc108eb 100644
--- a/linux-user/loader.h
+++ b/linux-user/loader.h
@@ -18,6 +18,18 @@ 
 #ifndef LINUX_USER_LOADER_H
 #define LINUX_USER_LOADER_H

+/* where to map binaries? */
+#if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
+# define TASK_UNMAPPED_BASE_PIE 0x5500000000
+# define TASK_UNMAPPED_BASE	0x7000000000
+#elif HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 32
+# define TASK_UNMAPPED_BASE_PIE	0x00300000
+# define TASK_UNMAPPED_BASE	(GUEST_ADDR_MAX - 0x20000000 + 1)
+#else /* HOST_LONG_BITS == 32 && TARGET_ABI_BITS == 32 */
+# define TASK_UNMAPPED_BASE_PIE	0x00000000
+# define TASK_UNMAPPED_BASE	0x40000000
+#endif
+
 /*
  * Read a good amount of data initially, to hopefully get all the
  * program headers loaded.
diff --git a/linux-user/mmap.c b/linux-user/mmap.c
index c624feead0..3441198e21 100644
--- a/linux-user/mmap.c
+++ b/linux-user/mmap.c
@@ -23,6 +23,7 @@ 
 #include "user-internals.h"
 #include "user-mmap.h"
 #include "target_mman.h"
+#include "loader.h"

 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
 static __thread int mmap_lock_count;
@@ -295,23 +296,6 @@  static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last,
     return true;
 }

-#if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
-#ifdef TARGET_AARCH64
-# define TASK_UNMAPPED_BASE  0x5500000000
-#else
-# define TASK_UNMAPPED_BASE  0x4000000000
-#endif
-#elif HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 32
-#ifdef TARGET_HPPA
-# define TASK_UNMAPPED_BASE  0xfa000000
-#else
-# define TASK_UNMAPPED_BASE  0xe0000000
-#endif
-#else /* HOST_LONG_BITS == 32 && TARGET_ABI_BITS == 32 */
-# define TASK_UNMAPPED_BASE  0x40000000
-#endif
-abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
-
 unsigned long last_brk;

 /*
@@ -344,6 +328,23 @@  abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
     abi_ulong addr;
     int wrapped, repeat;

+    static abi_ulong mmap_next_start;
+
+    /* initialize mmap_next_start if necessary */
+    if (!mmap_next_start) {
+        mmap_next_start = TASK_UNMAPPED_BASE;
+
+        /* do sanity checks on guest memory layout */
+        if (mmap_next_start >= GUEST_ADDR_MAX) {
+            mmap_next_start = GUEST_ADDR_MAX - 0x1000000000 + 1;
+        }
+
+        if (TASK_UNMAPPED_BASE_PIE >= mmap_next_start) {
+            fprintf(stderr, "Memory too small for PIE executables.\n");
+            exit(EXIT_FAILURE);
+        }
+    }
+
     align = MAX(align, qemu_host_page_size);

     /* If 'start' == 0, then a default start address is used. */