diff mbox

[1/3] tcg/arm: fix branch target change during code retranslation

Message ID 1294350874-6885-1-git-send-email-aurelien@aurel32.net
State New
Headers show

Commit Message

Aurelien Jarno Jan. 6, 2011, 9:54 p.m. UTC
QEMU uses code retranslation to restore the CPU state when an exception
happens. For it to work the retranslation must not modify the generated
code. This is what is currently implemented in ARM TCG.

However on CPU that don't have icache/dcache/memory synchronised like
ARM, this requirement is stronger and code retranslation must not modify
the generated code "atomically", as the cache line might be flushed
at any moment (interrupt, exception, task switching), even if not
triggered by QEMU. The probability for this to happen is very low, and
depends on cache size and associativiy, machine load, interrupts, so the
symptoms are might happen randomly.

This requirement is currently not followed in tcg/arm, for the
load/store code, which basically has the following structure:
  1) tlb access code is written
  2) conditional fast path code is written
  3) branch is written with a temporary target
  4) slow path code is written
  5) branch target is updated
The cache lines corresponding to the retranslated code is not flushed
after code retranslation as the generated code is supposed to be the
same. However if the cache line corresponding to the branch instruction
is flushed between step 3 and 5, and is not flushed again before the
code is executed again, the branch target is wrong. In the guest, the
symptoms are MMU page fault at a random addresses, which leads to
kernel page fault or segmentation faults.

The patch fixes this issue by avoiding writing the branch target until
it is known, that is by writing only the branch instruction first, and
later only the offset.

This fixes booting linux guests on ARM hosts (tested: arm, i386, mips,
mipsel, sh4, sparc).

Cc: Andrzej Zaborowski <balrog@zabor.org>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/arm/tcg-target.c |   28 ++++++++++++++++++++--------
 1 files changed, 20 insertions(+), 8 deletions(-)

Comments

Edgar E. Iglesias Jan. 7, 2011, 9:12 a.m. UTC | #1
On Thu, Jan 06, 2011 at 10:54:32PM +0100, Aurelien Jarno wrote:
> QEMU uses code retranslation to restore the CPU state when an exception
> happens. For it to work the retranslation must not modify the generated
> code. This is what is currently implemented in ARM TCG.
> 
> However on CPU that don't have icache/dcache/memory synchronised like
> ARM, this requirement is stronger and code retranslation must not modify
> the generated code "atomically", as the cache line might be flushed
> at any moment (interrupt, exception, task switching), even if not
> triggered by QEMU. The probability for this to happen is very low, and
> depends on cache size and associativiy, machine load, interrupts, so the
> symptoms are might happen randomly.
> 
> This requirement is currently not followed in tcg/arm, for the
> load/store code, which basically has the following structure:
>   1) tlb access code is written
>   2) conditional fast path code is written
>   3) branch is written with a temporary target
>   4) slow path code is written
>   5) branch target is updated
> The cache lines corresponding to the retranslated code is not flushed
> after code retranslation as the generated code is supposed to be the
> same. However if the cache line corresponding to the branch instruction
> is flushed between step 3 and 5, and is not flushed again before the
> code is executed again, the branch target is wrong. In the guest, the
> symptoms are MMU page fault at a random addresses, which leads to
> kernel page fault or segmentation faults.
> 
> The patch fixes this issue by avoiding writing the branch target until
> it is known, that is by writing only the branch instruction first, and
> later only the offset.
> 
> This fixes booting linux guests on ARM hosts (tested: arm, i386, mips,
> mipsel, sh4, sparc).
> 
> Cc: Andrzej Zaborowski <balrog@zabor.org>
> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>

Good catch!

The patch looks good to me.

Acked-by: Edgar E. Iglesias <edgar.iglesias@gmail.com>


> ---
>  tcg/arm/tcg-target.c |   28 ++++++++++++++++++++--------
>  1 files changed, 20 insertions(+), 8 deletions(-)
> 
> diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
> index a3af5b2..9def2e5 100644
> --- a/tcg/arm/tcg-target.c
> +++ b/tcg/arm/tcg-target.c
> @@ -113,12 +113,25 @@ static const int tcg_target_call_oarg_regs[2] = {
>      TCG_REG_R0, TCG_REG_R1
>  };
>  
> +static inline void reloc_abs32(void *code_ptr, tcg_target_long target)
> +{
> +    *(uint32_t *) code_ptr = target;
> +}
> +
> +static inline void reloc_pc24(void *code_ptr, tcg_target_long target)
> +{
> +    uint32_t offset = ((target - ((tcg_target_long) code_ptr + 8)) >> 2);
> +
> +    *(uint32_t *) code_ptr = ((*(uint32_t *) code_ptr) & ~0xffffff)
> +                             | (offset & 0xffffff);
> +}
> +
>  static void patch_reloc(uint8_t *code_ptr, int type,
>                  tcg_target_long value, tcg_target_long addend)
>  {
>      switch (type) {
>      case R_ARM_ABS32:
> -        *(uint32_t *) code_ptr = value;
> +        reloc_abs32(code_ptr, value);
>          break;
>  
>      case R_ARM_CALL:
> @@ -127,8 +140,7 @@ static void patch_reloc(uint8_t *code_ptr, int type,
>          tcg_abort();
>  
>      case R_ARM_PC24:
> -        *(uint32_t *) code_ptr = ((*(uint32_t *) code_ptr) & 0xff000000) |
> -                (((value - ((tcg_target_long) code_ptr + 8)) >> 2) & 0xffffff);
> +        reloc_pc24(code_ptr, value);
>          break;
>      }
>  }
> @@ -1031,7 +1043,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
>      }
>  
>      label_ptr = (void *) s->code_ptr;
> -    tcg_out_b(s, COND_EQ, 8);
> +    tcg_out_b_noaddr(s, COND_EQ);
>  
>      /* TODO: move this code to where the constants pool will be */
>      if (addr_reg != TCG_REG_R0) {
> @@ -1076,7 +1088,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
>          break;
>      }
>  
> -    *label_ptr += ((void *) s->code_ptr - (void *) label_ptr - 8) >> 2;
> +    reloc_pc24(label_ptr, (tcg_target_long)s->code_ptr);
>  #else /* !CONFIG_SOFTMMU */
>      if (GUEST_BASE) {
>          uint32_t offset = GUEST_BASE;
> @@ -1245,7 +1257,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
>      }
>  
>      label_ptr = (void *) s->code_ptr;
> -    tcg_out_b(s, COND_EQ, 8);
> +    tcg_out_b_noaddr(s, COND_EQ);
>  
>      /* TODO: move this code to where the constants pool will be */
>      tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
> @@ -1317,7 +1329,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
>      if (opc == 3)
>          tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R13, TCG_REG_R13, 0x10);
>  
> -    *label_ptr += ((void *) s->code_ptr - (void *) label_ptr - 8) >> 2;
> +    reloc_pc24(label_ptr, (tcg_target_long)s->code_ptr);
>  #else /* !CONFIG_SOFTMMU */
>      if (GUEST_BASE) {
>          uint32_t offset = GUEST_BASE;
> @@ -1399,7 +1411,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>              /* Direct jump method */
>  #if defined(USE_DIRECT_JUMP)
>              s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
> -            tcg_out_b(s, COND_AL, 8);
> +            tcg_out_b_noaddr(s, COND_AL);
>  #else
>              tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
>              s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
> -- 
> 1.7.2.3
> 
>
diff mbox

Patch

diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index a3af5b2..9def2e5 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -113,12 +113,25 @@  static const int tcg_target_call_oarg_regs[2] = {
     TCG_REG_R0, TCG_REG_R1
 };
 
+static inline void reloc_abs32(void *code_ptr, tcg_target_long target)
+{
+    *(uint32_t *) code_ptr = target;
+}
+
+static inline void reloc_pc24(void *code_ptr, tcg_target_long target)
+{
+    uint32_t offset = ((target - ((tcg_target_long) code_ptr + 8)) >> 2);
+
+    *(uint32_t *) code_ptr = ((*(uint32_t *) code_ptr) & ~0xffffff)
+                             | (offset & 0xffffff);
+}
+
 static void patch_reloc(uint8_t *code_ptr, int type,
                 tcg_target_long value, tcg_target_long addend)
 {
     switch (type) {
     case R_ARM_ABS32:
-        *(uint32_t *) code_ptr = value;
+        reloc_abs32(code_ptr, value);
         break;
 
     case R_ARM_CALL:
@@ -127,8 +140,7 @@  static void patch_reloc(uint8_t *code_ptr, int type,
         tcg_abort();
 
     case R_ARM_PC24:
-        *(uint32_t *) code_ptr = ((*(uint32_t *) code_ptr) & 0xff000000) |
-                (((value - ((tcg_target_long) code_ptr + 8)) >> 2) & 0xffffff);
+        reloc_pc24(code_ptr, value);
         break;
     }
 }
@@ -1031,7 +1043,7 @@  static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
     }
 
     label_ptr = (void *) s->code_ptr;
-    tcg_out_b(s, COND_EQ, 8);
+    tcg_out_b_noaddr(s, COND_EQ);
 
     /* TODO: move this code to where the constants pool will be */
     if (addr_reg != TCG_REG_R0) {
@@ -1076,7 +1088,7 @@  static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
         break;
     }
 
-    *label_ptr += ((void *) s->code_ptr - (void *) label_ptr - 8) >> 2;
+    reloc_pc24(label_ptr, (tcg_target_long)s->code_ptr);
 #else /* !CONFIG_SOFTMMU */
     if (GUEST_BASE) {
         uint32_t offset = GUEST_BASE;
@@ -1245,7 +1257,7 @@  static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
     }
 
     label_ptr = (void *) s->code_ptr;
-    tcg_out_b(s, COND_EQ, 8);
+    tcg_out_b_noaddr(s, COND_EQ);
 
     /* TODO: move this code to where the constants pool will be */
     tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
@@ -1317,7 +1329,7 @@  static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
     if (opc == 3)
         tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R13, TCG_REG_R13, 0x10);
 
-    *label_ptr += ((void *) s->code_ptr - (void *) label_ptr - 8) >> 2;
+    reloc_pc24(label_ptr, (tcg_target_long)s->code_ptr);
 #else /* !CONFIG_SOFTMMU */
     if (GUEST_BASE) {
         uint32_t offset = GUEST_BASE;
@@ -1399,7 +1411,7 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
             /* Direct jump method */
 #if defined(USE_DIRECT_JUMP)
             s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
-            tcg_out_b(s, COND_AL, 8);
+            tcg_out_b_noaddr(s, COND_AL);
 #else
             tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
             s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;