diff mbox

disas: Fix printing of addresses in disassembly

Message ID 1340636155-26426-1-git-send-email-peter.maydell@linaro.org
State New
Headers show

Commit Message

Peter Maydell June 25, 2012, 2:55 p.m. UTC
In our disassembly code, the bfd_vma type is always 64 bits,
even if the target's virtual address width is only 32 bits. This
means that when we print out addresses we need to truncate them
to 32 bits, to avoid odd output which has incorrectly sign-extended
a value to 64 bits, for instance this ARM example:
    0x80479a60:  e59f4088     ldr  r4, [pc, #136]  ; 0xffffffff80479a4f

(It would also be possible to truncate before passing the address
to info->print_address_func(), but truncating in the final print
function is the same approach that binutils takes to this problem.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 disas.c |   19 +++++++++++++++++++
 1 files changed, 19 insertions(+), 0 deletions(-)

Comments

Peter Maydell July 9, 2012, 10:27 a.m. UTC | #1
Ping? [patchwork url http://patchwork.ozlabs.org/patch/167132/]

-- PMM

On 25 June 2012 15:55, Peter Maydell <peter.maydell@linaro.org> wrote:
> In our disassembly code, the bfd_vma type is always 64 bits,
> even if the target's virtual address width is only 32 bits. This
> means that when we print out addresses we need to truncate them
> to 32 bits, to avoid odd output which has incorrectly sign-extended
> a value to 64 bits, for instance this ARM example:
>     0x80479a60:  e59f4088     ldr  r4, [pc, #136]  ; 0xffffffff80479a4f
>
> (It would also be possible to truncate before passing the address
> to info->print_address_func(), but truncating in the final print
> function is the same approach that binutils takes to this problem.)
>
> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
> ---
>  disas.c |   19 +++++++++++++++++++
>  1 files changed, 19 insertions(+), 0 deletions(-)
>
> diff --git a/disas.c b/disas.c
> index 93d8d30..7b2acc9 100644
> --- a/disas.c
> +++ b/disas.c
> @@ -64,6 +64,22 @@ generic_print_address (bfd_vma addr, struct disassemble_info *info)
>      (*info->fprintf_func) (info->stream, "0x%" PRIx64, addr);
>  }
>
> +/* Print address in hex, truncated to the width of a target virtual address. */
> +static void
> +generic_print_target_address(bfd_vma addr, struct disassemble_info *info)
> +{
> +    uint64_t mask = ~0ULL >> (64 - TARGET_VIRT_ADDR_SPACE_BITS);
> +    generic_print_address(addr & mask, info);
> +}
> +
> +/* Print address in hex, truncated to the width of a host virtual address. */
> +static void
> +generic_print_host_address(bfd_vma addr, struct disassemble_info *info)
> +{
> +    uint64_t mask = ~0ULL >> (64 - (sizeof(void *) * 8));
> +    generic_print_address(addr & mask, info);
> +}
> +
>  /* Just return the given address.  */
>
>  int
> @@ -154,6 +170,7 @@ void target_disas(FILE *out, target_ulong code, target_ulong size, int flags)
>      disasm_info.read_memory_func = target_read_memory;
>      disasm_info.buffer_vma = code;
>      disasm_info.buffer_length = size;
> +    disasm_info.print_address_func = generic_print_target_address;
>
>  #ifdef TARGET_WORDS_BIGENDIAN
>      disasm_info.endian = BFD_ENDIAN_BIG;
> @@ -274,6 +291,7 @@ void disas(FILE *out, void *code, unsigned long size)
>      int (*print_insn)(bfd_vma pc, disassemble_info *info);
>
>      INIT_DISASSEMBLE_INFO(disasm_info, out, fprintf);
> +    disasm_info.print_address_func = generic_print_host_address;
>
>      disasm_info.buffer = code;
>      disasm_info.buffer_vma = (uintptr_t)code;
> @@ -386,6 +404,7 @@ void monitor_disas(Monitor *mon, CPUArchState *env,
>      monitor_disas_env = env;
>      monitor_disas_is_physical = is_physical;
>      disasm_info.read_memory_func = monitor_read_memory;
> +    disasm_info.print_address_func = generic_print_target_address;
>
>      disasm_info.buffer_vma = pc;
>
> --
> 1.7.1
>
>
Andreas Färber July 9, 2012, 12:45 p.m. UTC | #2
Am 09.07.2012 12:27, schrieb Peter Maydell:
> Ping? [patchwork url http://patchwork.ozlabs.org/patch/167132/]
> 
> -- PMM
> 
> On 25 June 2012 15:55, Peter Maydell <peter.maydell@linaro.org> wrote:
>> In our disassembly code, the bfd_vma type is always 64 bits,
>> even if the target's virtual address width is only 32 bits. This
>> means that when we print out addresses we need to truncate them
>> to 32 bits, to avoid odd output which has incorrectly sign-extended
>> a value to 64 bits, for instance this ARM example:
>>     0x80479a60:  e59f4088     ldr  r4, [pc, #136]  ; 0xffffffff80479a4f
>>
>> (It would also be possible to truncate before passing the address
>> to info->print_address_func(), but truncating in the final print
>> function is the same approach that binutils takes to this problem.)

Is this bug fixed in binutils and didn't make it into QEMU due to GPLv3?
Or is this in QEMU glue code?

Andreas

>>
>> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
>> ---
>>  disas.c |   19 +++++++++++++++++++
>>  1 files changed, 19 insertions(+), 0 deletions(-)
>>
>> diff --git a/disas.c b/disas.c
>> index 93d8d30..7b2acc9 100644
>> --- a/disas.c
>> +++ b/disas.c
>> @@ -64,6 +64,22 @@ generic_print_address (bfd_vma addr, struct disassemble_info *info)
>>      (*info->fprintf_func) (info->stream, "0x%" PRIx64, addr);
>>  }
>>
>> +/* Print address in hex, truncated to the width of a target virtual address. */
>> +static void
>> +generic_print_target_address(bfd_vma addr, struct disassemble_info *info)
>> +{
>> +    uint64_t mask = ~0ULL >> (64 - TARGET_VIRT_ADDR_SPACE_BITS);
>> +    generic_print_address(addr & mask, info);
>> +}
>> +
>> +/* Print address in hex, truncated to the width of a host virtual address. */
>> +static void
>> +generic_print_host_address(bfd_vma addr, struct disassemble_info *info)
>> +{
>> +    uint64_t mask = ~0ULL >> (64 - (sizeof(void *) * 8));
>> +    generic_print_address(addr & mask, info);
>> +}
>> +
>>  /* Just return the given address.  */
>>
>>  int
>> @@ -154,6 +170,7 @@ void target_disas(FILE *out, target_ulong code, target_ulong size, int flags)
>>      disasm_info.read_memory_func = target_read_memory;
>>      disasm_info.buffer_vma = code;
>>      disasm_info.buffer_length = size;
>> +    disasm_info.print_address_func = generic_print_target_address;
>>
>>  #ifdef TARGET_WORDS_BIGENDIAN
>>      disasm_info.endian = BFD_ENDIAN_BIG;
>> @@ -274,6 +291,7 @@ void disas(FILE *out, void *code, unsigned long size)
>>      int (*print_insn)(bfd_vma pc, disassemble_info *info);
>>
>>      INIT_DISASSEMBLE_INFO(disasm_info, out, fprintf);
>> +    disasm_info.print_address_func = generic_print_host_address;
>>
>>      disasm_info.buffer = code;
>>      disasm_info.buffer_vma = (uintptr_t)code;
>> @@ -386,6 +404,7 @@ void monitor_disas(Monitor *mon, CPUArchState *env,
>>      monitor_disas_env = env;
>>      monitor_disas_is_physical = is_physical;
>>      disasm_info.read_memory_func = monitor_read_memory;
>> +    disasm_info.print_address_func = generic_print_target_address;
>>
>>      disasm_info.buffer_vma = pc;
>>
>> --
>> 1.7.1
>>
>>
>
Peter Maydell July 9, 2012, 12:59 p.m. UTC | #3
On 9 July 2012 13:45, Andreas Färber <afaerber@suse.de> wrote:
> Am 09.07.2012 12:27, schrieb Peter Maydell:
>> On 25 June 2012 15:55, Peter Maydell <peter.maydell@linaro.org> wrote:
>>> In our disassembly code, the bfd_vma type is always 64 bits,
>>> even if the target's virtual address width is only 32 bits. This
>>> means that when we print out addresses we need to truncate them
>>> to 32 bits, to avoid odd output which has incorrectly sign-extended
>>> a value to 64 bits, for instance this ARM example:
>>>     0x80479a60:  e59f4088     ldr  r4, [pc, #136]  ; 0xffffffff80479a4f
>>>
>>> (It would also be possible to truncate before passing the address
>>> to info->print_address_func(), but truncating in the final print
>>> function is the same approach that binutils takes to this problem.)
>
> Is this bug fixed in binutils and didn't make it into QEMU due to GPLv3?
> Or is this in QEMU glue code?

In binutils it is handled in the specific implementation of the
"print a target address" callback function in objdump. That's
not part of the code we have borrowed from binutils; the functions
changed by this patch are all QEMU glue code AFAIK.

-- PMM
Andreas Färber July 9, 2012, 1:19 p.m. UTC | #4
Am 25.06.2012 16:55, schrieb Peter Maydell:
> In our disassembly code, the bfd_vma type is always 64 bits,
> even if the target's virtual address width is only 32 bits. This
> means that when we print out addresses we need to truncate them
> to 32 bits, to avoid odd output which has incorrectly sign-extended
> a value to 64 bits, for instance this ARM example:
>     0x80479a60:  e59f4088     ldr  r4, [pc, #136]  ; 0xffffffff80479a4f
> 
> (It would also be possible to truncate before passing the address
> to info->print_address_func(), but truncating in the final print
> function is the same approach that binutils takes to this problem.)
> 
> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
> ---
>  disas.c |   19 +++++++++++++++++++
>  1 files changed, 19 insertions(+), 0 deletions(-)
> 
> diff --git a/disas.c b/disas.c
> index 93d8d30..7b2acc9 100644
> --- a/disas.c
> +++ b/disas.c
> @@ -64,6 +64,22 @@ generic_print_address (bfd_vma addr, struct disassemble_info *info)
>      (*info->fprintf_func) (info->stream, "0x%" PRIx64, addr);
>  }
>  
> +/* Print address in hex, truncated to the width of a target virtual address. */
> +static void
> +generic_print_target_address(bfd_vma addr, struct disassemble_info *info)
> +{
> +    uint64_t mask = ~0ULL >> (64 - TARGET_VIRT_ADDR_SPACE_BITS);
> +    generic_print_address(addr & mask, info);
> +}
> +
> +/* Print address in hex, truncated to the width of a host virtual address. */
> +static void
> +generic_print_host_address(bfd_vma addr, struct disassemble_info *info)
> +{
> +    uint64_t mask = ~0ULL >> (64 - (sizeof(void *) * 8));
> +    generic_print_address(addr & mask, info);
> +}
> +
>  /* Just return the given address.  */
>  
>  int

As usual the inversion and subtracted shift are a bit confusing at
first, but the algorithm looks okay.

I wonder if TARGET_VIRT_ADDR_SPACE_BITS is the correct factor to use
here though? Might sizeof(target_phys_addr_t) * 8 be safer? I'm thinking
of the possibility of having an alias in the 64-bit address space point
into the actual 36/48/... virtual address space. I have a ppc64 ld
instruction in mind, for which a full 64-bit register would be set up
that could not fully be represented in the virtual address space.

But maybe I'm misunderstanding what exactly these functions are being
assigned for below...

Andreas

> @@ -154,6 +170,7 @@ void target_disas(FILE *out, target_ulong code, target_ulong size, int flags)
>      disasm_info.read_memory_func = target_read_memory;
>      disasm_info.buffer_vma = code;
>      disasm_info.buffer_length = size;
> +    disasm_info.print_address_func = generic_print_target_address;
>  
>  #ifdef TARGET_WORDS_BIGENDIAN
>      disasm_info.endian = BFD_ENDIAN_BIG;
> @@ -274,6 +291,7 @@ void disas(FILE *out, void *code, unsigned long size)
>      int (*print_insn)(bfd_vma pc, disassemble_info *info);
>  
>      INIT_DISASSEMBLE_INFO(disasm_info, out, fprintf);
> +    disasm_info.print_address_func = generic_print_host_address;
>  
>      disasm_info.buffer = code;
>      disasm_info.buffer_vma = (uintptr_t)code;
> @@ -386,6 +404,7 @@ void monitor_disas(Monitor *mon, CPUArchState *env,
>      monitor_disas_env = env;
>      monitor_disas_is_physical = is_physical;
>      disasm_info.read_memory_func = monitor_read_memory;
> +    disasm_info.print_address_func = generic_print_target_address;
>  
>      disasm_info.buffer_vma = pc;
>
Peter Maydell July 9, 2012, 1:26 p.m. UTC | #5
On 9 July 2012 14:19, Andreas Färber <afaerber@suse.de> wrote:
> Am 25.06.2012 16:55, schrieb Peter Maydell:
>> In our disassembly code, the bfd_vma type is always 64 bits,
>> even if the target's virtual address width is only 32 bits. This
>> means that when we print out addresses we need to truncate them
>> to 32 bits, to avoid odd output which has incorrectly sign-extended
>> a value to 64 bits, for instance this ARM example:
>>     0x80479a60:  e59f4088     ldr  r4, [pc, #136]  ; 0xffffffff80479a4f
>>
>> (It would also be possible to truncate before passing the address
>> to info->print_address_func(), but truncating in the final print
>> function is the same approach that binutils takes to this problem.)

>> +/* Print address in hex, truncated to the width of a target virtual address. */
>> +static void
>> +generic_print_target_address(bfd_vma addr, struct disassemble_info *info)
>> +{
>> +    uint64_t mask = ~0ULL >> (64 - TARGET_VIRT_ADDR_SPACE_BITS);
>> +    generic_print_address(addr & mask, info);
>> +}

> I wonder if TARGET_VIRT_ADDR_SPACE_BITS is the correct factor to use
> here though? Might sizeof(target_phys_addr_t) * 8 be safer?

That will give the wrong answer for ARM (when my LPAE patchset lands):
ARM virtual addresses are 32 bits but sizeof(target_phys_addr_t) * 8
will be 64.

> I'm thinking
> of the possibility of having an alias in the 64-bit address space point
> into the actual 36/48/... virtual address space. I have a ppc64 ld
> instruction in mind, for which a full 64-bit register would be set up
> that could not fully be represented in the virtual address space.

It would be helpful to check how the ppc disassembler handles that
ld insn. It may well either (a) not print the resulting address at
all or (b) print it itself. However, if the resulting actual address
is a virtual address space then the right thing to print would be
the truncated version, I think, assuming that is what the hardware will
actually do the load from.

-- PMM
Andreas Färber July 9, 2012, 2:37 p.m. UTC | #6
Am 09.07.2012 15:26, schrieb Peter Maydell:
> On 9 July 2012 14:19, Andreas Färber <afaerber@suse.de> wrote:
>> Am 25.06.2012 16:55, schrieb Peter Maydell:
>>> In our disassembly code, the bfd_vma type is always 64 bits,
>>> even if the target's virtual address width is only 32 bits. This
>>> means that when we print out addresses we need to truncate them
>>> to 32 bits, to avoid odd output which has incorrectly sign-extended
>>> a value to 64 bits, for instance this ARM example:
>>>     0x80479a60:  e59f4088     ldr  r4, [pc, #136]  ; 0xffffffff80479a4f
>>>
>>> (It would also be possible to truncate before passing the address
>>> to info->print_address_func(), but truncating in the final print
>>> function is the same approach that binutils takes to this problem.)
> 
>>> +/* Print address in hex, truncated to the width of a target virtual address. */
>>> +static void
>>> +generic_print_target_address(bfd_vma addr, struct disassemble_info *info)
>>> +{
>>> +    uint64_t mask = ~0ULL >> (64 - TARGET_VIRT_ADDR_SPACE_BITS);
>>> +    generic_print_address(addr & mask, info);
>>> +}
> 
>> I wonder if TARGET_VIRT_ADDR_SPACE_BITS is the correct factor to use
>> here though? Might sizeof(target_phys_addr_t) * 8 be safer?
> 
> That will give the wrong answer for ARM (when my LPAE patchset lands):
> ARM virtual addresses are 32 bits but sizeof(target_phys_addr_t) * 8
> will be 64.

Sorry, I'm mixing things up...

      VAS / PAS
arm    32 / 40
i386   32 / 36
x86_64 47 / 52
ppc    32 / 36
ppc64  64 / 62

It's the physical address space where ppc64 is the oddball, so:

Reviewed-by: Andreas Färber <afaerber@suse.de>

>> I'm thinking
>> of the possibility of having an alias in the 64-bit address space point
>> into the actual 36/48/... virtual address space. I have a ppc64 ld
>> instruction in mind, for which a full 64-bit register would be set up
>> that could not fully be represented in the virtual address space.
> 
> It would be helpful to check how the ppc disassembler handles that
> ld insn. It may well either (a) not print the resulting address at
> all or (b) print it itself. However, if the resulting actual address
> is a virtual address space then the right thing to print would be
> the truncated version, I think, assuming that is what the hardware will
> actually do the load from.

I made a thinko: The disassembler would only show the register numbers
for the ld instruction, and the register would be loaded by up to five
instructions with 16-bit immediate (shifted) loads. So there would be no
problem with the operands.
The value printing at runtime is handled by the gdb stub instead.

Andreas
Blue Swirl July 14, 2012, 12:19 p.m. UTC | #7
On Mon, Jul 9, 2012 at 10:27 AM, Peter Maydell <peter.maydell@linaro.org> wrote:
> Ping? [patchwork url http://patchwork.ozlabs.org/patch/167132/]

Thanks, applied.

>
> -- PMM
>
> On 25 June 2012 15:55, Peter Maydell <peter.maydell@linaro.org> wrote:
>> In our disassembly code, the bfd_vma type is always 64 bits,
>> even if the target's virtual address width is only 32 bits. This
>> means that when we print out addresses we need to truncate them
>> to 32 bits, to avoid odd output which has incorrectly sign-extended
>> a value to 64 bits, for instance this ARM example:
>>     0x80479a60:  e59f4088     ldr  r4, [pc, #136]  ; 0xffffffff80479a4f
>>
>> (It would also be possible to truncate before passing the address
>> to info->print_address_func(), but truncating in the final print
>> function is the same approach that binutils takes to this problem.)
>>
>> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
>> ---
>>  disas.c |   19 +++++++++++++++++++
>>  1 files changed, 19 insertions(+), 0 deletions(-)
>>
>> diff --git a/disas.c b/disas.c
>> index 93d8d30..7b2acc9 100644
>> --- a/disas.c
>> +++ b/disas.c
>> @@ -64,6 +64,22 @@ generic_print_address (bfd_vma addr, struct disassemble_info *info)
>>      (*info->fprintf_func) (info->stream, "0x%" PRIx64, addr);
>>  }
>>
>> +/* Print address in hex, truncated to the width of a target virtual address. */
>> +static void
>> +generic_print_target_address(bfd_vma addr, struct disassemble_info *info)
>> +{
>> +    uint64_t mask = ~0ULL >> (64 - TARGET_VIRT_ADDR_SPACE_BITS);
>> +    generic_print_address(addr & mask, info);
>> +}
>> +
>> +/* Print address in hex, truncated to the width of a host virtual address. */
>> +static void
>> +generic_print_host_address(bfd_vma addr, struct disassemble_info *info)
>> +{
>> +    uint64_t mask = ~0ULL >> (64 - (sizeof(void *) * 8));
>> +    generic_print_address(addr & mask, info);
>> +}
>> +
>>  /* Just return the given address.  */
>>
>>  int
>> @@ -154,6 +170,7 @@ void target_disas(FILE *out, target_ulong code, target_ulong size, int flags)
>>      disasm_info.read_memory_func = target_read_memory;
>>      disasm_info.buffer_vma = code;
>>      disasm_info.buffer_length = size;
>> +    disasm_info.print_address_func = generic_print_target_address;
>>
>>  #ifdef TARGET_WORDS_BIGENDIAN
>>      disasm_info.endian = BFD_ENDIAN_BIG;
>> @@ -274,6 +291,7 @@ void disas(FILE *out, void *code, unsigned long size)
>>      int (*print_insn)(bfd_vma pc, disassemble_info *info);
>>
>>      INIT_DISASSEMBLE_INFO(disasm_info, out, fprintf);
>> +    disasm_info.print_address_func = generic_print_host_address;
>>
>>      disasm_info.buffer = code;
>>      disasm_info.buffer_vma = (uintptr_t)code;
>> @@ -386,6 +404,7 @@ void monitor_disas(Monitor *mon, CPUArchState *env,
>>      monitor_disas_env = env;
>>      monitor_disas_is_physical = is_physical;
>>      disasm_info.read_memory_func = monitor_read_memory;
>> +    disasm_info.print_address_func = generic_print_target_address;
>>
>>      disasm_info.buffer_vma = pc;
>>
>> --
>> 1.7.1
>>
>>
>
diff mbox

Patch

diff --git a/disas.c b/disas.c
index 93d8d30..7b2acc9 100644
--- a/disas.c
+++ b/disas.c
@@ -64,6 +64,22 @@  generic_print_address (bfd_vma addr, struct disassemble_info *info)
     (*info->fprintf_func) (info->stream, "0x%" PRIx64, addr);
 }
 
+/* Print address in hex, truncated to the width of a target virtual address. */
+static void
+generic_print_target_address(bfd_vma addr, struct disassemble_info *info)
+{
+    uint64_t mask = ~0ULL >> (64 - TARGET_VIRT_ADDR_SPACE_BITS);
+    generic_print_address(addr & mask, info);
+}
+
+/* Print address in hex, truncated to the width of a host virtual address. */
+static void
+generic_print_host_address(bfd_vma addr, struct disassemble_info *info)
+{
+    uint64_t mask = ~0ULL >> (64 - (sizeof(void *) * 8));
+    generic_print_address(addr & mask, info);
+}
+
 /* Just return the given address.  */
 
 int
@@ -154,6 +170,7 @@  void target_disas(FILE *out, target_ulong code, target_ulong size, int flags)
     disasm_info.read_memory_func = target_read_memory;
     disasm_info.buffer_vma = code;
     disasm_info.buffer_length = size;
+    disasm_info.print_address_func = generic_print_target_address;
 
 #ifdef TARGET_WORDS_BIGENDIAN
     disasm_info.endian = BFD_ENDIAN_BIG;
@@ -274,6 +291,7 @@  void disas(FILE *out, void *code, unsigned long size)
     int (*print_insn)(bfd_vma pc, disassemble_info *info);
 
     INIT_DISASSEMBLE_INFO(disasm_info, out, fprintf);
+    disasm_info.print_address_func = generic_print_host_address;
 
     disasm_info.buffer = code;
     disasm_info.buffer_vma = (uintptr_t)code;
@@ -386,6 +404,7 @@  void monitor_disas(Monitor *mon, CPUArchState *env,
     monitor_disas_env = env;
     monitor_disas_is_physical = is_physical;
     disasm_info.read_memory_func = monitor_read_memory;
+    disasm_info.print_address_func = generic_print_target_address;
 
     disasm_info.buffer_vma = pc;