diff mbox series

[v9,1/3] hw/riscv: clear kernel_entry higher bits from load_elf_ram_sym()

Message ID 20230119213707.651533-2-dbarboza@ventanamicro.com
State New
Headers show
Series [v9,1/3] hw/riscv: clear kernel_entry higher bits from load_elf_ram_sym() | expand

Commit Message

Daniel Henrique Barboza Jan. 19, 2023, 9:37 p.m. UTC
load_elf_ram_sym() will sign-extend 32 bit addresses. If a 32 bit
QEMU guest happens to be running in a hypervisor that are using 64
bits to encode its address, kernel_entry can be padded with '1's
and create problems [1].

Use a translate_fn() callback to be called by load_elf_ram_sym() and
return only the 32 bits address if we're running a 32 bit CPU.

[1] https://lists.gnu.org/archive/html/qemu-devel/2023-01/msg02281.html

Suggested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Suggested-by: Bin Meng <bmeng.cn@gmail.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
---
 hw/riscv/boot.c            | 20 +++++++++++++++++++-
 hw/riscv/microchip_pfsoc.c |  3 ++-
 hw/riscv/opentitan.c       |  3 ++-
 hw/riscv/sifive_e.c        |  3 ++-
 hw/riscv/sifive_u.c        |  3 ++-
 hw/riscv/spike.c           |  3 ++-
 hw/riscv/virt.c            |  3 ++-
 include/hw/riscv/boot.h    |  1 +
 8 files changed, 32 insertions(+), 7 deletions(-)

Comments

Alistair Francis Jan. 23, 2023, 12:01 a.m. UTC | #1
On Fri, Jan 20, 2023 at 7:38 AM Daniel Henrique Barboza
<dbarboza@ventanamicro.com> wrote:
>
> load_elf_ram_sym() will sign-extend 32 bit addresses. If a 32 bit
> QEMU guest happens to be running in a hypervisor that are using 64
> bits to encode its address, kernel_entry can be padded with '1's
> and create problems [1].
>
> Use a translate_fn() callback to be called by load_elf_ram_sym() and
> return only the 32 bits address if we're running a 32 bit CPU.
>
> [1] https://lists.gnu.org/archive/html/qemu-devel/2023-01/msg02281.html
>
> Suggested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> Suggested-by: Bin Meng <bmeng.cn@gmail.com>
> Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>

Reviewed-by: Alistair Francis <alistair.francis@wdc.com>

Alistair

> ---
>  hw/riscv/boot.c            | 20 +++++++++++++++++++-
>  hw/riscv/microchip_pfsoc.c |  3 ++-
>  hw/riscv/opentitan.c       |  3 ++-
>  hw/riscv/sifive_e.c        |  3 ++-
>  hw/riscv/sifive_u.c        |  3 ++-
>  hw/riscv/spike.c           |  3 ++-
>  hw/riscv/virt.c            |  3 ++-
>  include/hw/riscv/boot.h    |  1 +
>  8 files changed, 32 insertions(+), 7 deletions(-)
>
> diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c
> index 2594276223..46fc7adccf 100644
> --- a/hw/riscv/boot.c
> +++ b/hw/riscv/boot.c
> @@ -173,7 +173,24 @@ target_ulong riscv_load_firmware(const char *firmware_filename,
>      exit(1);
>  }
>
> +static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
> +{
> +    RISCVHartArrayState *harts = opaque;
> +
> +    if (riscv_is_32bit(harts)) {
> +        /*
> +         * For 32 bit CPUs, kernel_load_base is sign-extended
> +         * (i.e. it can be padded with '1's) by load_elf().
> +         * Remove the sign extension by truncating to 32-bit.
> +         */
> +        return extract64(addr, 0, 32);
> +    }
> +
> +    return addr;
> +}
> +
>  target_ulong riscv_load_kernel(MachineState *machine,
> +                               RISCVHartArrayState *harts,
>                                 target_ulong kernel_start_addr,
>                                 symbol_fn_t sym_cb)
>  {
> @@ -189,7 +206,8 @@ target_ulong riscv_load_kernel(MachineState *machine,
>       * the (expected) load address load address. This allows kernels to have
>       * separate SBI and ELF entry points (used by FreeBSD, for example).
>       */
> -    if (load_elf_ram_sym(kernel_filename, NULL, NULL, NULL,
> +    if (load_elf_ram_sym(kernel_filename, NULL,
> +                         translate_kernel_address, harts,
>                           NULL, &kernel_load_base, NULL, NULL, 0,
>                           EM_RISCV, 1, 0, NULL, true, sym_cb) > 0) {
>          return kernel_load_base;
> diff --git a/hw/riscv/microchip_pfsoc.c b/hw/riscv/microchip_pfsoc.c
> index 82ae5e7023..bdefeb3cbb 100644
> --- a/hw/riscv/microchip_pfsoc.c
> +++ b/hw/riscv/microchip_pfsoc.c
> @@ -629,7 +629,8 @@ static void microchip_icicle_kit_machine_init(MachineState *machine)
>          kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus,
>                                                           firmware_end_addr);
>
> -        kernel_entry = riscv_load_kernel(machine, kernel_start_addr, NULL);
> +        kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus,
> +                                         kernel_start_addr, NULL);
>
>          if (machine->initrd_filename) {
>              riscv_load_initrd(machine, kernel_entry);
> diff --git a/hw/riscv/opentitan.c b/hw/riscv/opentitan.c
> index 64d5d435b9..2731138c41 100644
> --- a/hw/riscv/opentitan.c
> +++ b/hw/riscv/opentitan.c
> @@ -101,7 +101,8 @@ static void opentitan_board_init(MachineState *machine)
>      }
>
>      if (machine->kernel_filename) {
> -        riscv_load_kernel(machine, memmap[IBEX_DEV_RAM].base, NULL);
> +        riscv_load_kernel(machine, &s->soc.cpus,
> +                          memmap[IBEX_DEV_RAM].base, NULL);
>      }
>  }
>
> diff --git a/hw/riscv/sifive_e.c b/hw/riscv/sifive_e.c
> index 3e3f4b0088..1a7d381514 100644
> --- a/hw/riscv/sifive_e.c
> +++ b/hw/riscv/sifive_e.c
> @@ -114,7 +114,8 @@ static void sifive_e_machine_init(MachineState *machine)
>                            memmap[SIFIVE_E_DEV_MROM].base, &address_space_memory);
>
>      if (machine->kernel_filename) {
> -        riscv_load_kernel(machine, memmap[SIFIVE_E_DEV_DTIM].base, NULL);
> +        riscv_load_kernel(machine, &s->soc.cpus,
> +                          memmap[SIFIVE_E_DEV_DTIM].base, NULL);
>      }
>  }
>
> diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c
> index 2fb6ee231f..83dfe09877 100644
> --- a/hw/riscv/sifive_u.c
> +++ b/hw/riscv/sifive_u.c
> @@ -598,7 +598,8 @@ static void sifive_u_machine_init(MachineState *machine)
>          kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus,
>                                                           firmware_end_addr);
>
> -        kernel_entry = riscv_load_kernel(machine, kernel_start_addr, NULL);
> +        kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus,
> +                                         kernel_start_addr, NULL);
>
>          if (machine->initrd_filename) {
>              riscv_load_initrd(machine, kernel_entry);
> diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c
> index badc11ec43..2bcc50d90d 100644
> --- a/hw/riscv/spike.c
> +++ b/hw/riscv/spike.c
> @@ -305,7 +305,8 @@ static void spike_board_init(MachineState *machine)
>          kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
>                                                           firmware_end_addr);
>
> -        kernel_entry = riscv_load_kernel(machine, kernel_start_addr,
> +        kernel_entry = riscv_load_kernel(machine, &s->soc[0],
> +                                         kernel_start_addr,
>                                           htif_symbol_callback);
>
>          if (machine->initrd_filename) {
> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
> index 4a11b4b010..ac173a6ed6 100644
> --- a/hw/riscv/virt.c
> +++ b/hw/riscv/virt.c
> @@ -1274,7 +1274,8 @@ static void virt_machine_done(Notifier *notifier, void *data)
>          kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
>                                                           firmware_end_addr);
>
> -        kernel_entry = riscv_load_kernel(machine, kernel_start_addr, NULL);
> +        kernel_entry = riscv_load_kernel(machine, &s->soc[0],
> +                                         kernel_start_addr, NULL);
>
>          if (machine->initrd_filename) {
>              riscv_load_initrd(machine, kernel_entry);
> diff --git a/include/hw/riscv/boot.h b/include/hw/riscv/boot.h
> index f94653a09b..105706bf25 100644
> --- a/include/hw/riscv/boot.h
> +++ b/include/hw/riscv/boot.h
> @@ -44,6 +44,7 @@ target_ulong riscv_load_firmware(const char *firmware_filename,
>                                   hwaddr firmware_load_addr,
>                                   symbol_fn_t sym_cb);
>  target_ulong riscv_load_kernel(MachineState *machine,
> +                               RISCVHartArrayState *harts,
>                                 target_ulong firmware_end_addr,
>                                 symbol_fn_t sym_cb);
>  void riscv_load_initrd(MachineState *machine, uint64_t kernel_entry);
> --
> 2.39.0
>
>
Alistair Francis Feb. 1, 2023, 12:16 a.m. UTC | #2
On Fri, Jan 20, 2023 at 7:38 AM Daniel Henrique Barboza
<dbarboza@ventanamicro.com> wrote:
>
> load_elf_ram_sym() will sign-extend 32 bit addresses. If a 32 bit
> QEMU guest happens to be running in a hypervisor that are using 64
> bits to encode its address, kernel_entry can be padded with '1's
> and create problems [1].
>
> Use a translate_fn() callback to be called by load_elf_ram_sym() and
> return only the 32 bits address if we're running a 32 bit CPU.
>
> [1] https://lists.gnu.org/archive/html/qemu-devel/2023-01/msg02281.html
>
> Suggested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> Suggested-by: Bin Meng <bmeng.cn@gmail.com>
> Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
> ---
>  hw/riscv/boot.c            | 20 +++++++++++++++++++-
>  hw/riscv/microchip_pfsoc.c |  3 ++-
>  hw/riscv/opentitan.c       |  3 ++-
>  hw/riscv/sifive_e.c        |  3 ++-
>  hw/riscv/sifive_u.c        |  3 ++-
>  hw/riscv/spike.c           |  3 ++-
>  hw/riscv/virt.c            |  3 ++-
>  include/hw/riscv/boot.h    |  1 +
>  8 files changed, 32 insertions(+), 7 deletions(-)
>
> diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c
> index 2594276223..46fc7adccf 100644
> --- a/hw/riscv/boot.c
> +++ b/hw/riscv/boot.c
> @@ -173,7 +173,24 @@ target_ulong riscv_load_firmware(const char *firmware_filename,
>      exit(1);
>  }
>
> +static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
> +{
> +    RISCVHartArrayState *harts = opaque;
> +
> +    if (riscv_is_32bit(harts)) {
> +        /*
> +         * For 32 bit CPUs, kernel_load_base is sign-extended
> +         * (i.e. it can be padded with '1's) by load_elf().
> +         * Remove the sign extension by truncating to 32-bit.
> +         */
> +        return extract64(addr, 0, 32);
> +    }
> +
> +    return addr;

So.... After all that, this doesn't actually mask pentry from
load_elf_ram_sym(), so it doesn't help.

> +}
> +
>  target_ulong riscv_load_kernel(MachineState *machine,
> +                               RISCVHartArrayState *harts,
>                                 target_ulong kernel_start_addr,
>                                 symbol_fn_t sym_cb)
>  {
> @@ -189,7 +206,8 @@ target_ulong riscv_load_kernel(MachineState *machine,
>       * the (expected) load address load address. This allows kernels to have
>       * separate SBI and ELF entry points (used by FreeBSD, for example).
>       */
> -    if (load_elf_ram_sym(kernel_filename, NULL, NULL, NULL,
> +    if (load_elf_ram_sym(kernel_filename, NULL,
> +                         translate_kernel_address, harts,
>                           NULL, &kernel_load_base, NULL, NULL, 0,
>                           EM_RISCV, 1, 0, NULL, true, sym_cb) > 0) {

I think we just need to add the mask here

Alistair

>          return kernel_load_base;
> diff --git a/hw/riscv/microchip_pfsoc.c b/hw/riscv/microchip_pfsoc.c
> index 82ae5e7023..bdefeb3cbb 100644
> --- a/hw/riscv/microchip_pfsoc.c
> +++ b/hw/riscv/microchip_pfsoc.c
> @@ -629,7 +629,8 @@ static void microchip_icicle_kit_machine_init(MachineState *machine)
>          kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus,
>                                                           firmware_end_addr);
>
> -        kernel_entry = riscv_load_kernel(machine, kernel_start_addr, NULL);
> +        kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus,
> +                                         kernel_start_addr, NULL);
>
>          if (machine->initrd_filename) {
>              riscv_load_initrd(machine, kernel_entry);
> diff --git a/hw/riscv/opentitan.c b/hw/riscv/opentitan.c
> index 64d5d435b9..2731138c41 100644
> --- a/hw/riscv/opentitan.c
> +++ b/hw/riscv/opentitan.c
> @@ -101,7 +101,8 @@ static void opentitan_board_init(MachineState *machine)
>      }
>
>      if (machine->kernel_filename) {
> -        riscv_load_kernel(machine, memmap[IBEX_DEV_RAM].base, NULL);
> +        riscv_load_kernel(machine, &s->soc.cpus,
> +                          memmap[IBEX_DEV_RAM].base, NULL);
>      }
>  }
>
> diff --git a/hw/riscv/sifive_e.c b/hw/riscv/sifive_e.c
> index 3e3f4b0088..1a7d381514 100644
> --- a/hw/riscv/sifive_e.c
> +++ b/hw/riscv/sifive_e.c
> @@ -114,7 +114,8 @@ static void sifive_e_machine_init(MachineState *machine)
>                            memmap[SIFIVE_E_DEV_MROM].base, &address_space_memory);
>
>      if (machine->kernel_filename) {
> -        riscv_load_kernel(machine, memmap[SIFIVE_E_DEV_DTIM].base, NULL);
> +        riscv_load_kernel(machine, &s->soc.cpus,
> +                          memmap[SIFIVE_E_DEV_DTIM].base, NULL);
>      }
>  }
>
> diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c
> index 2fb6ee231f..83dfe09877 100644
> --- a/hw/riscv/sifive_u.c
> +++ b/hw/riscv/sifive_u.c
> @@ -598,7 +598,8 @@ static void sifive_u_machine_init(MachineState *machine)
>          kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus,
>                                                           firmware_end_addr);
>
> -        kernel_entry = riscv_load_kernel(machine, kernel_start_addr, NULL);
> +        kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus,
> +                                         kernel_start_addr, NULL);
>
>          if (machine->initrd_filename) {
>              riscv_load_initrd(machine, kernel_entry);
> diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c
> index badc11ec43..2bcc50d90d 100644
> --- a/hw/riscv/spike.c
> +++ b/hw/riscv/spike.c
> @@ -305,7 +305,8 @@ static void spike_board_init(MachineState *machine)
>          kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
>                                                           firmware_end_addr);
>
> -        kernel_entry = riscv_load_kernel(machine, kernel_start_addr,
> +        kernel_entry = riscv_load_kernel(machine, &s->soc[0],
> +                                         kernel_start_addr,
>                                           htif_symbol_callback);
>
>          if (machine->initrd_filename) {
> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
> index 4a11b4b010..ac173a6ed6 100644
> --- a/hw/riscv/virt.c
> +++ b/hw/riscv/virt.c
> @@ -1274,7 +1274,8 @@ static void virt_machine_done(Notifier *notifier, void *data)
>          kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
>                                                           firmware_end_addr);
>
> -        kernel_entry = riscv_load_kernel(machine, kernel_start_addr, NULL);
> +        kernel_entry = riscv_load_kernel(machine, &s->soc[0],
> +                                         kernel_start_addr, NULL);
>
>          if (machine->initrd_filename) {
>              riscv_load_initrd(machine, kernel_entry);
> diff --git a/include/hw/riscv/boot.h b/include/hw/riscv/boot.h
> index f94653a09b..105706bf25 100644
> --- a/include/hw/riscv/boot.h
> +++ b/include/hw/riscv/boot.h
> @@ -44,6 +44,7 @@ target_ulong riscv_load_firmware(const char *firmware_filename,
>                                   hwaddr firmware_load_addr,
>                                   symbol_fn_t sym_cb);
>  target_ulong riscv_load_kernel(MachineState *machine,
> +                               RISCVHartArrayState *harts,
>                                 target_ulong firmware_end_addr,
>                                 symbol_fn_t sym_cb);
>  void riscv_load_initrd(MachineState *machine, uint64_t kernel_entry);
> --
> 2.39.0
>
>
Daniel Henrique Barboza Feb. 1, 2023, 1:48 p.m. UTC | #3
On 1/31/23 21:16, Alistair Francis wrote:
> On Fri, Jan 20, 2023 at 7:38 AM Daniel Henrique Barboza
> <dbarboza@ventanamicro.com> wrote:
>>
>> load_elf_ram_sym() will sign-extend 32 bit addresses. If a 32 bit
>> QEMU guest happens to be running in a hypervisor that are using 64
>> bits to encode its address, kernel_entry can be padded with '1's
>> and create problems [1].
>>
>> Use a translate_fn() callback to be called by load_elf_ram_sym() and
>> return only the 32 bits address if we're running a 32 bit CPU.
>>
>> [1] https://lists.gnu.org/archive/html/qemu-devel/2023-01/msg02281.html
>>
>> Suggested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
>> Suggested-by: Bin Meng <bmeng.cn@gmail.com>
>> Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
>> Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
>> ---
>>   hw/riscv/boot.c            | 20 +++++++++++++++++++-
>>   hw/riscv/microchip_pfsoc.c |  3 ++-
>>   hw/riscv/opentitan.c       |  3 ++-
>>   hw/riscv/sifive_e.c        |  3 ++-
>>   hw/riscv/sifive_u.c        |  3 ++-
>>   hw/riscv/spike.c           |  3 ++-
>>   hw/riscv/virt.c            |  3 ++-
>>   include/hw/riscv/boot.h    |  1 +
>>   8 files changed, 32 insertions(+), 7 deletions(-)
>>
>> diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c
>> index 2594276223..46fc7adccf 100644
>> --- a/hw/riscv/boot.c
>> +++ b/hw/riscv/boot.c
>> @@ -173,7 +173,24 @@ target_ulong riscv_load_firmware(const char *firmware_filename,
>>       exit(1);
>>   }
>>
>> +static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
>> +{
>> +    RISCVHartArrayState *harts = opaque;
>> +
>> +    if (riscv_is_32bit(harts)) {
>> +        /*
>> +         * For 32 bit CPUs, kernel_load_base is sign-extended
>> +         * (i.e. it can be padded with '1's) by load_elf().
>> +         * Remove the sign extension by truncating to 32-bit.
>> +         */
>> +        return extract64(addr, 0, 32);
>> +    }
>> +
>> +    return addr;
> 
> So.... After all that, this doesn't actually mask pentry from
> load_elf_ram_sym(), so it doesn't help.

Interesting. And it has to do with how load_elf() is handling it. All these
load_elf() functions will end up in either load_elf64() or load_elf32(), which
in turn will call glue(load_elf, SZ) from include/hw/elf_ops.h.

In that function, pentry is calculated right at the start:


     if (pentry)
         *pentry = (uint64_t)(elf_sword)ehdr.e_entry;


And then, down below, the translation function is used:

             if (translate_fn) {
                 addr = translate_fn(translate_opaque, ph->p_paddr);
                 glue(elf_reloc, SZ)(&ehdr, fd, must_swab,  translate_fn,
                                     translate_opaque, data, ph, elf_machine);
             } else {
                 addr = ph->p_paddr;
             }

And 'pentry' is only recalculated if we do NOT have a translate_fn:


             /* the entry pointer in the ELF header is a virtual
              * address, if the text segments paddr and vaddr differ
              * we need to adjust the entry */
             if (pentry && !translate_fn &&
                     ph->p_vaddr != ph->p_paddr &&
                     ehdr.e_entry >= ph->p_vaddr &&
                     ehdr.e_entry < ph->p_vaddr + ph->p_filesz &&
                     ph->p_flags & PF_X) {
                 *pentry = ehdr.e_entry - ph->p_vaddr + ph->p_paddr;
             }


I can't say whether this is an oversight or intended, but in the end it's a given
that pentry is not being affected by translate_fn() as we thought it would - meaning
that we're still passing the sign-extension along.

I'll mention this in the commit message for future reference.




> 
>> +}
>> +
>>   target_ulong riscv_load_kernel(MachineState *machine,
>> +                               RISCVHartArrayState *harts,
>>                                  target_ulong kernel_start_addr,
>>                                  symbol_fn_t sym_cb)
>>   {
>> @@ -189,7 +206,8 @@ target_ulong riscv_load_kernel(MachineState *machine,
>>        * the (expected) load address load address. This allows kernels to have
>>        * separate SBI and ELF entry points (used by FreeBSD, for example).
>>        */
>> -    if (load_elf_ram_sym(kernel_filename, NULL, NULL, NULL,
>> +    if (load_elf_ram_sym(kernel_filename, NULL,
>> +                         translate_kernel_address, harts,
>>                            NULL, &kernel_load_base, NULL, NULL, 0,
>>                            EM_RISCV, 1, 0, NULL, true, sym_cb) > 0) {
> 
> I think we just need to add the mask here


I saw in other machines that we can retrieve 'kernel_low' using the parameter
right after kernel_load_base. This worked for me to boot a 32 bit 'virt' machine
with -kernel:


-    if (load_elf_ram_sym(kernel_filename, NULL,
-                         translate_kernel_address, harts,
-                         NULL, &kernel_load_base, NULL, NULL, 0,
+    if (load_elf_ram_sym(kernel_filename, NULL, NULL, NULL, NULL,
+                         &kernel_load_base, &kernel_low, NULL, 0,
                           EM_RISCV, 1, 0, NULL, true, sym_cb) > 0) {
          kernel_entry = kernel_load_base;
+
+        if (riscv_is_32bit(harts)) {
+            kernel_entry = kernel_low;
+        }


I don't have your failing use case here so it would be up for you to be 'adventurous'
and see if this would fix the problem you're seeing. If you'd rather get this over
with we can just mask it out and call it day.


Thanks,

Daniel



> 
> Alistair
> 
>>           return kernel_load_base;
>> diff --git a/hw/riscv/microchip_pfsoc.c b/hw/riscv/microchip_pfsoc.c
>> index 82ae5e7023..bdefeb3cbb 100644
>> --- a/hw/riscv/microchip_pfsoc.c
>> +++ b/hw/riscv/microchip_pfsoc.c
>> @@ -629,7 +629,8 @@ static void microchip_icicle_kit_machine_init(MachineState *machine)
>>           kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus,
>>                                                            firmware_end_addr);
>>
>> -        kernel_entry = riscv_load_kernel(machine, kernel_start_addr, NULL);
>> +        kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus,
>> +                                         kernel_start_addr, NULL);
>>
>>           if (machine->initrd_filename) {
>>               riscv_load_initrd(machine, kernel_entry);
>> diff --git a/hw/riscv/opentitan.c b/hw/riscv/opentitan.c
>> index 64d5d435b9..2731138c41 100644
>> --- a/hw/riscv/opentitan.c
>> +++ b/hw/riscv/opentitan.c
>> @@ -101,7 +101,8 @@ static void opentitan_board_init(MachineState *machine)
>>       }
>>
>>       if (machine->kernel_filename) {
>> -        riscv_load_kernel(machine, memmap[IBEX_DEV_RAM].base, NULL);
>> +        riscv_load_kernel(machine, &s->soc.cpus,
>> +                          memmap[IBEX_DEV_RAM].base, NULL);
>>       }
>>   }
>>
>> diff --git a/hw/riscv/sifive_e.c b/hw/riscv/sifive_e.c
>> index 3e3f4b0088..1a7d381514 100644
>> --- a/hw/riscv/sifive_e.c
>> +++ b/hw/riscv/sifive_e.c
>> @@ -114,7 +114,8 @@ static void sifive_e_machine_init(MachineState *machine)
>>                             memmap[SIFIVE_E_DEV_MROM].base, &address_space_memory);
>>
>>       if (machine->kernel_filename) {
>> -        riscv_load_kernel(machine, memmap[SIFIVE_E_DEV_DTIM].base, NULL);
>> +        riscv_load_kernel(machine, &s->soc.cpus,
>> +                          memmap[SIFIVE_E_DEV_DTIM].base, NULL);
>>       }
>>   }
>>
>> diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c
>> index 2fb6ee231f..83dfe09877 100644
>> --- a/hw/riscv/sifive_u.c
>> +++ b/hw/riscv/sifive_u.c
>> @@ -598,7 +598,8 @@ static void sifive_u_machine_init(MachineState *machine)
>>           kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus,
>>                                                            firmware_end_addr);
>>
>> -        kernel_entry = riscv_load_kernel(machine, kernel_start_addr, NULL);
>> +        kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus,
>> +                                         kernel_start_addr, NULL);
>>
>>           if (machine->initrd_filename) {
>>               riscv_load_initrd(machine, kernel_entry);
>> diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c
>> index badc11ec43..2bcc50d90d 100644
>> --- a/hw/riscv/spike.c
>> +++ b/hw/riscv/spike.c
>> @@ -305,7 +305,8 @@ static void spike_board_init(MachineState *machine)
>>           kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
>>                                                            firmware_end_addr);
>>
>> -        kernel_entry = riscv_load_kernel(machine, kernel_start_addr,
>> +        kernel_entry = riscv_load_kernel(machine, &s->soc[0],
>> +                                         kernel_start_addr,
>>                                            htif_symbol_callback);
>>
>>           if (machine->initrd_filename) {
>> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
>> index 4a11b4b010..ac173a6ed6 100644
>> --- a/hw/riscv/virt.c
>> +++ b/hw/riscv/virt.c
>> @@ -1274,7 +1274,8 @@ static void virt_machine_done(Notifier *notifier, void *data)
>>           kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
>>                                                            firmware_end_addr);
>>
>> -        kernel_entry = riscv_load_kernel(machine, kernel_start_addr, NULL);
>> +        kernel_entry = riscv_load_kernel(machine, &s->soc[0],
>> +                                         kernel_start_addr, NULL);
>>
>>           if (machine->initrd_filename) {
>>               riscv_load_initrd(machine, kernel_entry);
>> diff --git a/include/hw/riscv/boot.h b/include/hw/riscv/boot.h
>> index f94653a09b..105706bf25 100644
>> --- a/include/hw/riscv/boot.h
>> +++ b/include/hw/riscv/boot.h
>> @@ -44,6 +44,7 @@ target_ulong riscv_load_firmware(const char *firmware_filename,
>>                                    hwaddr firmware_load_addr,
>>                                    symbol_fn_t sym_cb);
>>   target_ulong riscv_load_kernel(MachineState *machine,
>> +                               RISCVHartArrayState *harts,
>>                                  target_ulong firmware_end_addr,
>>                                  symbol_fn_t sym_cb);
>>   void riscv_load_initrd(MachineState *machine, uint64_t kernel_entry);
>> --
>> 2.39.0
>>
>>
Alistair Francis Feb. 2, 2023, 12:28 a.m. UTC | #4
On Wed, Feb 1, 2023 at 11:48 PM Daniel Henrique Barboza
<dbarboza@ventanamicro.com> wrote:
>
>
>
> On 1/31/23 21:16, Alistair Francis wrote:
> > On Fri, Jan 20, 2023 at 7:38 AM Daniel Henrique Barboza
> > <dbarboza@ventanamicro.com> wrote:
> >>
> >> load_elf_ram_sym() will sign-extend 32 bit addresses. If a 32 bit
> >> QEMU guest happens to be running in a hypervisor that are using 64
> >> bits to encode its address, kernel_entry can be padded with '1's
> >> and create problems [1].
> >>
> >> Use a translate_fn() callback to be called by load_elf_ram_sym() and
> >> return only the 32 bits address if we're running a 32 bit CPU.
> >>
> >> [1] https://lists.gnu.org/archive/html/qemu-devel/2023-01/msg02281.html
> >>
> >> Suggested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> >> Suggested-by: Bin Meng <bmeng.cn@gmail.com>
> >> Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> >> Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
> >> ---
> >>   hw/riscv/boot.c            | 20 +++++++++++++++++++-
> >>   hw/riscv/microchip_pfsoc.c |  3 ++-
> >>   hw/riscv/opentitan.c       |  3 ++-
> >>   hw/riscv/sifive_e.c        |  3 ++-
> >>   hw/riscv/sifive_u.c        |  3 ++-
> >>   hw/riscv/spike.c           |  3 ++-
> >>   hw/riscv/virt.c            |  3 ++-
> >>   include/hw/riscv/boot.h    |  1 +
> >>   8 files changed, 32 insertions(+), 7 deletions(-)
> >>
> >> diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c
> >> index 2594276223..46fc7adccf 100644
> >> --- a/hw/riscv/boot.c
> >> +++ b/hw/riscv/boot.c
> >> @@ -173,7 +173,24 @@ target_ulong riscv_load_firmware(const char *firmware_filename,
> >>       exit(1);
> >>   }
> >>
> >> +static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
> >> +{
> >> +    RISCVHartArrayState *harts = opaque;
> >> +
> >> +    if (riscv_is_32bit(harts)) {
> >> +        /*
> >> +         * For 32 bit CPUs, kernel_load_base is sign-extended
> >> +         * (i.e. it can be padded with '1's) by load_elf().
> >> +         * Remove the sign extension by truncating to 32-bit.
> >> +         */
> >> +        return extract64(addr, 0, 32);
> >> +    }
> >> +
> >> +    return addr;
> >
> > So.... After all that, this doesn't actually mask pentry from
> > load_elf_ram_sym(), so it doesn't help.
>
> Interesting. And it has to do with how load_elf() is handling it. All these
> load_elf() functions will end up in either load_elf64() or load_elf32(), which
> in turn will call glue(load_elf, SZ) from include/hw/elf_ops.h.
>
> In that function, pentry is calculated right at the start:
>
>
>      if (pentry)
>          *pentry = (uint64_t)(elf_sword)ehdr.e_entry;
>
>
> And then, down below, the translation function is used:
>
>              if (translate_fn) {
>                  addr = translate_fn(translate_opaque, ph->p_paddr);
>                  glue(elf_reloc, SZ)(&ehdr, fd, must_swab,  translate_fn,
>                                      translate_opaque, data, ph, elf_machine);
>              } else {
>                  addr = ph->p_paddr;
>              }
>
> And 'pentry' is only recalculated if we do NOT have a translate_fn:
>
>
>              /* the entry pointer in the ELF header is a virtual
>               * address, if the text segments paddr and vaddr differ
>               * we need to adjust the entry */
>              if (pentry && !translate_fn &&
>                      ph->p_vaddr != ph->p_paddr &&
>                      ehdr.e_entry >= ph->p_vaddr &&
>                      ehdr.e_entry < ph->p_vaddr + ph->p_filesz &&
>                      ph->p_flags & PF_X) {
>                  *pentry = ehdr.e_entry - ph->p_vaddr + ph->p_paddr;
>              }
>
>
> I can't say whether this is an oversight or intended, but in the end it's a given
> that pentry is not being affected by translate_fn() as we thought it would - meaning
> that we're still passing the sign-extension along.
>
> I'll mention this in the commit message for future reference.
>
>
>
>
> >
> >> +}
> >> +
> >>   target_ulong riscv_load_kernel(MachineState *machine,
> >> +                               RISCVHartArrayState *harts,
> >>                                  target_ulong kernel_start_addr,
> >>                                  symbol_fn_t sym_cb)
> >>   {
> >> @@ -189,7 +206,8 @@ target_ulong riscv_load_kernel(MachineState *machine,
> >>        * the (expected) load address load address. This allows kernels to have
> >>        * separate SBI and ELF entry points (used by FreeBSD, for example).
> >>        */
> >> -    if (load_elf_ram_sym(kernel_filename, NULL, NULL, NULL,
> >> +    if (load_elf_ram_sym(kernel_filename, NULL,
> >> +                         translate_kernel_address, harts,
> >>                            NULL, &kernel_load_base, NULL, NULL, 0,
> >>                            EM_RISCV, 1, 0, NULL, true, sym_cb) > 0) {
> >
> > I think we just need to add the mask here
>
>
> I saw in other machines that we can retrieve 'kernel_low' using the parameter
> right after kernel_load_base. This worked for me to boot a 32 bit 'virt' machine
> with -kernel:
>
>
> -    if (load_elf_ram_sym(kernel_filename, NULL,
> -                         translate_kernel_address, harts,
> -                         NULL, &kernel_load_base, NULL, NULL, 0,
> +    if (load_elf_ram_sym(kernel_filename, NULL, NULL, NULL, NULL,
> +                         &kernel_load_base, &kernel_low, NULL, 0,
>                            EM_RISCV, 1, 0, NULL, true, sym_cb) > 0) {
>           kernel_entry = kernel_load_base;
> +
> +        if (riscv_is_32bit(harts)) {
> +            kernel_entry = kernel_low;
> +        }

That looks good to me, I expect that would work

Alistair

>
>
> I don't have your failing use case here so it would be up for you to be 'adventurous'
> and see if this would fix the problem you're seeing. If you'd rather get this over
> with we can just mask it out and call it day.
>
>
> Thanks,
>
> Daniel
>
>
>
> >
> > Alistair
> >
> >>           return kernel_load_base;
> >> diff --git a/hw/riscv/microchip_pfsoc.c b/hw/riscv/microchip_pfsoc.c
> >> index 82ae5e7023..bdefeb3cbb 100644
> >> --- a/hw/riscv/microchip_pfsoc.c
> >> +++ b/hw/riscv/microchip_pfsoc.c
> >> @@ -629,7 +629,8 @@ static void microchip_icicle_kit_machine_init(MachineState *machine)
> >>           kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus,
> >>                                                            firmware_end_addr);
> >>
> >> -        kernel_entry = riscv_load_kernel(machine, kernel_start_addr, NULL);
> >> +        kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus,
> >> +                                         kernel_start_addr, NULL);
> >>
> >>           if (machine->initrd_filename) {
> >>               riscv_load_initrd(machine, kernel_entry);
> >> diff --git a/hw/riscv/opentitan.c b/hw/riscv/opentitan.c
> >> index 64d5d435b9..2731138c41 100644
> >> --- a/hw/riscv/opentitan.c
> >> +++ b/hw/riscv/opentitan.c
> >> @@ -101,7 +101,8 @@ static void opentitan_board_init(MachineState *machine)
> >>       }
> >>
> >>       if (machine->kernel_filename) {
> >> -        riscv_load_kernel(machine, memmap[IBEX_DEV_RAM].base, NULL);
> >> +        riscv_load_kernel(machine, &s->soc.cpus,
> >> +                          memmap[IBEX_DEV_RAM].base, NULL);
> >>       }
> >>   }
> >>
> >> diff --git a/hw/riscv/sifive_e.c b/hw/riscv/sifive_e.c
> >> index 3e3f4b0088..1a7d381514 100644
> >> --- a/hw/riscv/sifive_e.c
> >> +++ b/hw/riscv/sifive_e.c
> >> @@ -114,7 +114,8 @@ static void sifive_e_machine_init(MachineState *machine)
> >>                             memmap[SIFIVE_E_DEV_MROM].base, &address_space_memory);
> >>
> >>       if (machine->kernel_filename) {
> >> -        riscv_load_kernel(machine, memmap[SIFIVE_E_DEV_DTIM].base, NULL);
> >> +        riscv_load_kernel(machine, &s->soc.cpus,
> >> +                          memmap[SIFIVE_E_DEV_DTIM].base, NULL);
> >>       }
> >>   }
> >>
> >> diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c
> >> index 2fb6ee231f..83dfe09877 100644
> >> --- a/hw/riscv/sifive_u.c
> >> +++ b/hw/riscv/sifive_u.c
> >> @@ -598,7 +598,8 @@ static void sifive_u_machine_init(MachineState *machine)
> >>           kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus,
> >>                                                            firmware_end_addr);
> >>
> >> -        kernel_entry = riscv_load_kernel(machine, kernel_start_addr, NULL);
> >> +        kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus,
> >> +                                         kernel_start_addr, NULL);
> >>
> >>           if (machine->initrd_filename) {
> >>               riscv_load_initrd(machine, kernel_entry);
> >> diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c
> >> index badc11ec43..2bcc50d90d 100644
> >> --- a/hw/riscv/spike.c
> >> +++ b/hw/riscv/spike.c
> >> @@ -305,7 +305,8 @@ static void spike_board_init(MachineState *machine)
> >>           kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
> >>                                                            firmware_end_addr);
> >>
> >> -        kernel_entry = riscv_load_kernel(machine, kernel_start_addr,
> >> +        kernel_entry = riscv_load_kernel(machine, &s->soc[0],
> >> +                                         kernel_start_addr,
> >>                                            htif_symbol_callback);
> >>
> >>           if (machine->initrd_filename) {
> >> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
> >> index 4a11b4b010..ac173a6ed6 100644
> >> --- a/hw/riscv/virt.c
> >> +++ b/hw/riscv/virt.c
> >> @@ -1274,7 +1274,8 @@ static void virt_machine_done(Notifier *notifier, void *data)
> >>           kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
> >>                                                            firmware_end_addr);
> >>
> >> -        kernel_entry = riscv_load_kernel(machine, kernel_start_addr, NULL);
> >> +        kernel_entry = riscv_load_kernel(machine, &s->soc[0],
> >> +                                         kernel_start_addr, NULL);
> >>
> >>           if (machine->initrd_filename) {
> >>               riscv_load_initrd(machine, kernel_entry);
> >> diff --git a/include/hw/riscv/boot.h b/include/hw/riscv/boot.h
> >> index f94653a09b..105706bf25 100644
> >> --- a/include/hw/riscv/boot.h
> >> +++ b/include/hw/riscv/boot.h
> >> @@ -44,6 +44,7 @@ target_ulong riscv_load_firmware(const char *firmware_filename,
> >>                                    hwaddr firmware_load_addr,
> >>                                    symbol_fn_t sym_cb);
> >>   target_ulong riscv_load_kernel(MachineState *machine,
> >> +                               RISCVHartArrayState *harts,
> >>                                  target_ulong firmware_end_addr,
> >>                                  symbol_fn_t sym_cb);
> >>   void riscv_load_initrd(MachineState *machine, uint64_t kernel_entry);
> >> --
> >> 2.39.0
> >>
> >>
diff mbox series

Patch

diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c
index 2594276223..46fc7adccf 100644
--- a/hw/riscv/boot.c
+++ b/hw/riscv/boot.c
@@ -173,7 +173,24 @@  target_ulong riscv_load_firmware(const char *firmware_filename,
     exit(1);
 }
 
+static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
+{
+    RISCVHartArrayState *harts = opaque;
+
+    if (riscv_is_32bit(harts)) {
+        /*
+         * For 32 bit CPUs, kernel_load_base is sign-extended
+         * (i.e. it can be padded with '1's) by load_elf().
+         * Remove the sign extension by truncating to 32-bit.
+         */
+        return extract64(addr, 0, 32);
+    }
+
+    return addr;
+}
+
 target_ulong riscv_load_kernel(MachineState *machine,
+                               RISCVHartArrayState *harts,
                                target_ulong kernel_start_addr,
                                symbol_fn_t sym_cb)
 {
@@ -189,7 +206,8 @@  target_ulong riscv_load_kernel(MachineState *machine,
      * the (expected) load address load address. This allows kernels to have
      * separate SBI and ELF entry points (used by FreeBSD, for example).
      */
-    if (load_elf_ram_sym(kernel_filename, NULL, NULL, NULL,
+    if (load_elf_ram_sym(kernel_filename, NULL,
+                         translate_kernel_address, harts,
                          NULL, &kernel_load_base, NULL, NULL, 0,
                          EM_RISCV, 1, 0, NULL, true, sym_cb) > 0) {
         return kernel_load_base;
diff --git a/hw/riscv/microchip_pfsoc.c b/hw/riscv/microchip_pfsoc.c
index 82ae5e7023..bdefeb3cbb 100644
--- a/hw/riscv/microchip_pfsoc.c
+++ b/hw/riscv/microchip_pfsoc.c
@@ -629,7 +629,8 @@  static void microchip_icicle_kit_machine_init(MachineState *machine)
         kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus,
                                                          firmware_end_addr);
 
-        kernel_entry = riscv_load_kernel(machine, kernel_start_addr, NULL);
+        kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus,
+                                         kernel_start_addr, NULL);
 
         if (machine->initrd_filename) {
             riscv_load_initrd(machine, kernel_entry);
diff --git a/hw/riscv/opentitan.c b/hw/riscv/opentitan.c
index 64d5d435b9..2731138c41 100644
--- a/hw/riscv/opentitan.c
+++ b/hw/riscv/opentitan.c
@@ -101,7 +101,8 @@  static void opentitan_board_init(MachineState *machine)
     }
 
     if (machine->kernel_filename) {
-        riscv_load_kernel(machine, memmap[IBEX_DEV_RAM].base, NULL);
+        riscv_load_kernel(machine, &s->soc.cpus,
+                          memmap[IBEX_DEV_RAM].base, NULL);
     }
 }
 
diff --git a/hw/riscv/sifive_e.c b/hw/riscv/sifive_e.c
index 3e3f4b0088..1a7d381514 100644
--- a/hw/riscv/sifive_e.c
+++ b/hw/riscv/sifive_e.c
@@ -114,7 +114,8 @@  static void sifive_e_machine_init(MachineState *machine)
                           memmap[SIFIVE_E_DEV_MROM].base, &address_space_memory);
 
     if (machine->kernel_filename) {
-        riscv_load_kernel(machine, memmap[SIFIVE_E_DEV_DTIM].base, NULL);
+        riscv_load_kernel(machine, &s->soc.cpus,
+                          memmap[SIFIVE_E_DEV_DTIM].base, NULL);
     }
 }
 
diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c
index 2fb6ee231f..83dfe09877 100644
--- a/hw/riscv/sifive_u.c
+++ b/hw/riscv/sifive_u.c
@@ -598,7 +598,8 @@  static void sifive_u_machine_init(MachineState *machine)
         kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus,
                                                          firmware_end_addr);
 
-        kernel_entry = riscv_load_kernel(machine, kernel_start_addr, NULL);
+        kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus,
+                                         kernel_start_addr, NULL);
 
         if (machine->initrd_filename) {
             riscv_load_initrd(machine, kernel_entry);
diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c
index badc11ec43..2bcc50d90d 100644
--- a/hw/riscv/spike.c
+++ b/hw/riscv/spike.c
@@ -305,7 +305,8 @@  static void spike_board_init(MachineState *machine)
         kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
                                                          firmware_end_addr);
 
-        kernel_entry = riscv_load_kernel(machine, kernel_start_addr,
+        kernel_entry = riscv_load_kernel(machine, &s->soc[0],
+                                         kernel_start_addr,
                                          htif_symbol_callback);
 
         if (machine->initrd_filename) {
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 4a11b4b010..ac173a6ed6 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -1274,7 +1274,8 @@  static void virt_machine_done(Notifier *notifier, void *data)
         kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
                                                          firmware_end_addr);
 
-        kernel_entry = riscv_load_kernel(machine, kernel_start_addr, NULL);
+        kernel_entry = riscv_load_kernel(machine, &s->soc[0],
+                                         kernel_start_addr, NULL);
 
         if (machine->initrd_filename) {
             riscv_load_initrd(machine, kernel_entry);
diff --git a/include/hw/riscv/boot.h b/include/hw/riscv/boot.h
index f94653a09b..105706bf25 100644
--- a/include/hw/riscv/boot.h
+++ b/include/hw/riscv/boot.h
@@ -44,6 +44,7 @@  target_ulong riscv_load_firmware(const char *firmware_filename,
                                  hwaddr firmware_load_addr,
                                  symbol_fn_t sym_cb);
 target_ulong riscv_load_kernel(MachineState *machine,
+                               RISCVHartArrayState *harts,
                                target_ulong firmware_end_addr,
                                symbol_fn_t sym_cb);
 void riscv_load_initrd(MachineState *machine, uint64_t kernel_entry);