diff mbox

ARM softmmu breakpoint misbehavior

Message ID 55DB560D.5060108@gmail.com
State New
Headers show

Commit Message

Sergey Fedorov Aug. 24, 2015, 5:36 p.m. UTC
Hi all,

Seems there is a bug in ARM breakpoint emulation. I am not sure how to
fix it and I would appreciate any suggestion. It is best illustrated by
a simple test which sets up and enables an unlinked address match
breakpoint but does not enable debug exceptions globally by
MDSCR_EL1.MDE bit.

cat >test.s <<EOF               
    .text
    .global _start
_start:
    adr     x0, bp
    msr     dbgbvr0_el1, x0
    mov     x0, #1
    orr     x0, x0, #(0xf << 5)
    msr     dbgbcr0_el1, x0
bp:
    nop
    wfi
    b       .
EOF

aarch64-linux-gnu-as -o test.o test.s
aarch64-linux-gnu-ld -Ttext=0x40000000 -o test.elf test.o
qemu-system-aarch64 -nographic -machine virt -cpu cortex-a57 -kernel
test.elf -D qemu.log -d in_asm,exec -singlestep

First, it fails with segmentation fault. What actually happens is a CPU
breakpoint is inserted in hw_breakpoint_update(). After that, when
translating bp() an internal debug exception is generated in
gen_intermediate_code_internal_a64() since there is a CPU breakpoint
which matches the address of the instruction being translated. Then
arm_debug_excp_handler() get called in order to handle this breakpoint.
It calls check_breakpoints() and discovers there is no breakpoints
enabled since MDSCR_EL1.MDE is not set. It simply returns and we
eventually get to cpu_handle_guest_debug(), then gdb_set_stop_cpu()
which does segmentation fault.

I managed to avoid this segmentation fault with this patch:

             if (extended_addresses_enabled(env)) {
@@ -900,6 +909,8 @@ void arm_debug_excp_handler(CPUState *cs)
             raise_exception(env, EXCP_PREFETCH_ABORT,
                             syn_breakpoint(same_el),
                             arm_debug_target_el(env));
+        } else {
+            cpu_resume_from_signal(cs, NULL);
         }
     }
 }

The patch adds a check for non-CPU breakpoints first, then calls
cpu_resume_from_signal() if no CPU breakpoint matches.

With this patch Qemu hangs generating internal debug exception over and
over:

head -40 qemu.log
----------------
IN:
0x0000000040000000:  100000a0      adr x0, #+0x14 (addr 0x40000014)

Trace 0x7ff11e237000 [0000000040000000]
----------------
IN:
0x0000000040000004:  d5100080      msr (unknown), x0

Trace 0x7ff11e237040 [0000000040000004]
----------------
IN:
0x0000000040000008:  d2800020      mov x0, #0x1

Trace 0x7ff11e237080 [0000000040000008]
----------------
IN:
0x000000004000000c:  b27b0c00      orr x0, x0, #0x1e0

Trace 0x7ff11e2370c0 [000000004000000c]
----------------
IN:
0x0000000040000010:  d51000a0      msr (unknown), x0

Trace 0x7ff11e237110 [0000000040000010]
----------------
IN:
0x0000000040000014:  d503201f      nop
Disassembler disagrees with translator over instruction decoding
Please report this to qemu-devel@nongnu.org

Trace 0x7ff11e237150 [0000000040000014]
Trace 0x7ff11e237150 [0000000040000014]
Trace 0x7ff11e237150 [0000000040000014]
Trace 0x7ff11e237150 [0000000040000014]
Trace 0x7ff11e237150 [0000000040000014]
Trace 0x7ff11e237150 [0000000040000014]
Trace 0x7ff11e237150 [0000000040000014]
Trace 0x7ff11e237150 [0000000040000014]
Trace 0x7ff11e237150 [0000000040000014]

It looks like a bug, but I actually have no idea how would be best to
overcome this situation. I would be thankful for any suggestion :)

Best regards,
Sergey

Comments

Christopher Covington Aug. 25, 2015, 8:12 p.m. UTC | #1
On 08/24/2015 01:36 PM, Sergey Fedorov wrote:
> Hi all,
> 
> Seems there is a bug in ARM breakpoint emulation. I am not sure how to
> fix it and I would appreciate any suggestion. It is best illustrated by
> a simple test which sets up and enables an unlinked address match
> breakpoint but does not enable debug exceptions globally by
> MDSCR_EL1.MDE bit.
> 
> cat >test.s <<EOF               
>     .text
>     .global _start
> _start:
>     adr     x0, bp
>     msr     dbgbvr0_el1, x0
>     mov     x0, #1
>     orr     x0, x0, #(0xf << 5)
>     msr     dbgbcr0_el1, x0
> bp:
>     nop
>     wfi
>     b       .
> EOF
> 
> aarch64-linux-gnu-as -o test.o test.s
> aarch64-linux-gnu-ld -Ttext=0x40000000 -o test.elf test.o
> qemu-system-aarch64 -nographic -machine virt -cpu cortex-a57 -kernel
> test.elf -D qemu.log -d in_asm,exec -singlestep
> 
> First, it fails with segmentation fault. What actually happens is a CPU
> breakpoint is inserted in hw_breakpoint_update(). After that, when
> translating bp() an internal debug exception is generated in
> gen_intermediate_code_internal_a64() since there is a CPU breakpoint
> which matches the address of the instruction being translated. Then
> arm_debug_excp_handler() get called in order to handle this breakpoint.
> It calls check_breakpoints() and discovers there is no breakpoints
> enabled since MDSCR_EL1.MDE is not set. It simply returns and we
> eventually get to cpu_handle_guest_debug(), then gdb_set_stop_cpu()
> which does segmentation fault.
> 
> I managed to avoid this segmentation fault with this patch:
> 
> diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
> index 663c05d..223b939 100644
> --- a/target-arm/op_helper.c
> +++ b/target-arm/op_helper.c
> @@ -889,6 +889,15 @@ void arm_debug_excp_handler(CPUState *cs)
>              }
>          }
>      } else {
> +        CPUBreakpoint *bp;
> +        uint64_t pc = is_a64(env) ? env->pc : env->regs[15];
> +
> +        QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
> +            if (bp->pc == pc && !(bp->flags & BP_CPU)) {
> +                return;
> +            }
> +        }
> +
>          if (check_breakpoints(cpu)) {
>              bool same_el = (arm_debug_target_el(env) ==
> arm_current_el(env));
>              if (extended_addresses_enabled(env)) {
> @@ -900,6 +909,8 @@ void arm_debug_excp_handler(CPUState *cs)
>              raise_exception(env, EXCP_PREFETCH_ABORT,
>                              syn_breakpoint(same_el),
>                              arm_debug_target_el(env));
> +        } else {
> +            cpu_resume_from_signal(cs, NULL);
>          }
>      }
>  }
> 
> The patch adds a check for non-CPU breakpoints first, then calls
> cpu_resume_from_signal() if no CPU breakpoint matches.
> 
> With this patch Qemu hangs generating internal debug exception over and
> over:
> 
> head -40 qemu.log
> ----------------
> IN:
> 0x0000000040000000:  100000a0      adr x0, #+0x14 (addr 0x40000014)
> 
> Trace 0x7ff11e237000 [0000000040000000]
> ----------------
> IN:
> 0x0000000040000004:  d5100080      msr (unknown), x0
> 
> Trace 0x7ff11e237040 [0000000040000004]
> ----------------
> IN:
> 0x0000000040000008:  d2800020      mov x0, #0x1
> 
> Trace 0x7ff11e237080 [0000000040000008]
> ----------------
> IN:
> 0x000000004000000c:  b27b0c00      orr x0, x0, #0x1e0
> 
> Trace 0x7ff11e2370c0 [000000004000000c]
> ----------------
> IN:
> 0x0000000040000010:  d51000a0      msr (unknown), x0
> 
> Trace 0x7ff11e237110 [0000000040000010]
> ----------------
> IN:
> 0x0000000040000014:  d503201f      nop
> Disassembler disagrees with translator over instruction decoding
> Please report this to qemu-devel@nongnu.org
> 
> Trace 0x7ff11e237150 [0000000040000014]
> Trace 0x7ff11e237150 [0000000040000014]
> Trace 0x7ff11e237150 [0000000040000014]
> Trace 0x7ff11e237150 [0000000040000014]
> Trace 0x7ff11e237150 [0000000040000014]
> Trace 0x7ff11e237150 [0000000040000014]
> Trace 0x7ff11e237150 [0000000040000014]
> Trace 0x7ff11e237150 [0000000040000014]
> Trace 0x7ff11e237150 [0000000040000014]
> 
> It looks like a bug, but I actually have no idea how would be best to
> overcome this situation. I would be thankful for any suggestion :)

Sorry that I don't have anything more useful to say than the following, but
adding -d int to the mix might help illustrate the alleged internal debug
exception in the trace. Peter recently posted a series related to semihosting
(target-arm: Implement A64 semihosting) that I think touches some of this
code, if you haven't seen that already.

Christopher Covington
Peter Maydell Aug. 28, 2015, 7:21 p.m. UTC | #2
On 24 August 2015 at 18:36, Sergey Fedorov <serge.fdrv@gmail.com> wrote:
> Hi all,
>
> Seems there is a bug in ARM breakpoint emulation. I am not sure how to
> fix it and I would appreciate any suggestion. It is best illustrated by
> a simple test which sets up and enables an unlinked address match
> breakpoint but does not enable debug exceptions globally by
> MDSCR_EL1.MDE bit.

Thanks for the test case, that's very useful.

> I managed to avoid this segmentation fault with this patch:
>
> diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
> index 663c05d..223b939 100644
> --- a/target-arm/op_helper.c
> +++ b/target-arm/op_helper.c
> @@ -889,6 +889,15 @@ void arm_debug_excp_handler(CPUState *cs)
>              }
>          }
>      } else {
> +        CPUBreakpoint *bp;
> +        uint64_t pc = is_a64(env) ? env->pc : env->regs[15];
> +
> +        QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
> +            if (bp->pc == pc && !(bp->flags & BP_CPU)) {
> +                return;
> +            }
> +        }
> +
>          if (check_breakpoints(cpu)) {
>              bool same_el = (arm_debug_target_el(env) ==
> arm_current_el(env));
>              if (extended_addresses_enabled(env)) {
> @@ -900,6 +909,8 @@ void arm_debug_excp_handler(CPUState *cs)
>              raise_exception(env, EXCP_PREFETCH_ABORT,
>                              syn_breakpoint(same_el),
>                              arm_debug_target_el(env));
> +        } else {
> +            cpu_resume_from_signal(cs, NULL);
>          }
>      }
>  }
>
> The patch adds a check for non-CPU breakpoints first, then calls
> cpu_resume_from_signal() if no CPU breakpoint matches.

This approach won't work (as you've noticed :-)), because
it's trying to say "ignore the bp hit and continue execution",
but when we translated code for this TB all we generated was
a TB that says "generate debug exception". We never generated
a TB with the real code for the instruction in it, so trying
to resume will just cause us to raise the debug exception
again.

I think what we need to do is have the translate-a64.c
code be smarter, and actually generate the real code
if we're not going to really hit the bp. Except that we
don't really have all the info in the flags to know for
sure about that. So we probably need to do something like
generating a call to a helper which checks whether this
bp should hit and doesn't throw the exception unless it
has to, with the actual code for the insn following.
I need to think about how this ought to work...

The watchpoint code has a chance of cpu_resume_from_signal
doing the right thing, because we really did have the
code to do the load/store. However I have a feeling this
won't interact properly with the fact that ARM needs
BP_STOP_BEFORE_ACCESS on its watchpoints (unlike x86, which
is where I was looking at when I wrote the ARM wp handling
code.) So we may well be broken there as well in the
case where check_watchpoints() returns false.

thanks
-- PMM
Sergey Fedorov Sept. 1, 2015, 11:58 a.m. UTC | #3
28.08.2015 22:21, Peter Maydell wrote:
> I think what we need to do is have the translate-a64.c
> code be smarter, and actually generate the real code
> if we're not going to really hit the bp. Except that we
> don't really have all the info in the flags to know for
> sure about that. So we probably need to do something like
> generating a call to a helper which checks whether this
> bp should hit and doesn't throw the exception unless it
> has to, with the actual code for the insn following.
> I need to think about how this ought to work...

I think we should only generate a CPU breakpoint TCG exception when it 
is really going to become a CPU exception (or a GDB exception). It could 
be done by moving the logic of check_breakpoints() to a helper called 
from TB.

> The watchpoint code has a chance of cpu_resume_from_signal
> doing the right thing, because we really did have the
> code to do the load/store. However I have a feeling this
> won't interact properly with the fact that ARM needs
> BP_STOP_BEFORE_ACCESS on its watchpoints (unlike x86, which
> is where I was looking at when I wrote the ARM wp handling
> code.) So we may well be broken there as well in the
> case where check_watchpoints() returns false.

As of watchpoints, I'm going to check that a bit later as well.
Sergey Fedorov Sept. 2, 2015, 4:53 p.m. UTC | #4
On 28.08.2015 22:21, Peter Maydell wrote:
> The watchpoint code has a chance of cpu_resume_from_signal
> doing the right thing, because we really did have the
> code to do the load/store. However I have a feeling this
> won't interact properly with the fact that ARM needs
> BP_STOP_BEFORE_ACCESS on its watchpoints (unlike x86, which
> is where I was looking at when I wrote the ARM wp handling
> code.) So we may well be broken there as well in the
> case where check_watchpoints() returns false.

You are right. The same problem with watchpoints. Here is a small test
for this:

    .text
    .global _start
_start:
    adr     x0, wp
    msr     dbgwvr0_el1, x0
    mov     x0, #1
    orr     x0, x0, #(3 << 3)
    orr     x0, x0, #(0xff << 5)
    msr     dbgwcr0_el1, x0
    ldr     x0, wp
    wfi
    b       .

    .data
    .balign 64
wp:
    .quad   0
diff mbox

Patch

diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index 663c05d..223b939 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -889,6 +889,15 @@  void arm_debug_excp_handler(CPUState *cs)
             }
         }
     } else {
+        CPUBreakpoint *bp;
+        uint64_t pc = is_a64(env) ? env->pc : env->regs[15];
+
+        QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
+            if (bp->pc == pc && !(bp->flags & BP_CPU)) {
+                return;
+            }
+        }
+
         if (check_breakpoints(cpu)) {
             bool same_el = (arm_debug_target_el(env) ==
arm_current_el(env));