diff mbox series

tcg/riscv: Fix base register for user-only qemu_ld/st

Message ID 20221023233337.2846860-1-richard.henderson@linaro.org
State New
Headers show
Series tcg/riscv: Fix base register for user-only qemu_ld/st | expand

Commit Message

Richard Henderson Oct. 23, 2022, 11:33 p.m. UTC
When guest_base != 0, we were not coordinating the usage of
TCG_REG_TMP0 as base properly, leading to a previous zero-extend
of the input address being discarded.

Shuffle the alignment check to the front, because that does not
depend on the zero-extend, and it keeps the register usage clear.
Set base after each step of the address arithmetic instead of before.

Return the base register used from tcg_out_tlb_load, so as to
keep that register choice localized to that function.

Reported-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/riscv/tcg-target.c.inc | 39 +++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 17 deletions(-)

Comments

LIU Zhiwei Oct. 25, 2022, 6:09 a.m. UTC | #1
On 2022/10/24 7:33, Richard Henderson wrote:
> When guest_base != 0, we were not coordinating the usage of
> TCG_REG_TMP0 as base properly, leading to a previous zero-extend
> of the input address being discarded.
>
> Shuffle the alignment check to the front, because that does not
> depend on the zero-extend, and it keeps the register usage clear.
> Set base after each step of the address arithmetic instead of before.
>
> Return the base register used from tcg_out_tlb_load, so as to
> keep that register choice localized to that function.
>
> Reported-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Reviewed-by:  LIU Zhiwei <zhiwei_liu@linux.alibaba.com>

Thanks,
Zhiwei

> ---
>   tcg/riscv/tcg-target.c.inc | 39 +++++++++++++++++++++-----------------
>   1 file changed, 22 insertions(+), 17 deletions(-)
>
> diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
> index 2a84c57bec..e3b608034f 100644
> --- a/tcg/riscv/tcg-target.c.inc
> +++ b/tcg/riscv/tcg-target.c.inc
> @@ -923,9 +923,9 @@ static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
>       tcg_debug_assert(ok);
>   }
>   
> -static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
> -                             TCGReg addrh, MemOpIdx oi,
> -                             tcg_insn_unit **label_ptr, bool is_load)
> +static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
> +                               TCGReg addrh, MemOpIdx oi,
> +                               tcg_insn_unit **label_ptr, bool is_load)
>   {
>       MemOp opc = get_memop(oi);
>       unsigned s_bits = opc & MO_SIZE;
> @@ -975,6 +975,7 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
>           addrl = TCG_REG_TMP0;
>       }
>       tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addrl);
> +    return TCG_REG_TMP0;
>   }
>   
>   static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi,
> @@ -1177,7 +1178,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
>   #else
>       unsigned a_bits;
>   #endif
> -    TCGReg base = TCG_REG_TMP0;
> +    TCGReg base;
>   
>       data_regl = *args++;
>       data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
> @@ -1187,23 +1188,25 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
>       opc = get_memop(oi);
>   
>   #if defined(CONFIG_SOFTMMU)
> -    tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 1);
> +    base = tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 1);
>       tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
>       add_qemu_ldst_label(s, 1, oi,
>                           (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
>                           data_regl, data_regh, addr_regl, addr_regh,
>                           s->code_ptr, label_ptr);
>   #else
> -    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
> -        tcg_out_ext32u(s, base, addr_regl);
> -        addr_regl = base;
> -    }
>       a_bits = get_alignment_bits(opc);
>       if (a_bits) {
>           tcg_out_test_alignment(s, true, addr_regl, a_bits);
>       }
> +    base = addr_regl;
> +    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
> +        tcg_out_ext32u(s, TCG_REG_TMP0, base);
> +        base = TCG_REG_TMP0;
> +    }
>       if (guest_base != 0) {
> -        tcg_out_opc_reg(s, OPC_ADD, base, TCG_GUEST_BASE_REG, addr_regl);
> +        tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base);
> +        base = TCG_REG_TMP0;
>       }
>       tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
>   #endif
> @@ -1249,7 +1252,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
>   #else
>       unsigned a_bits;
>   #endif
> -    TCGReg base = TCG_REG_TMP0;
> +    TCGReg base;
>   
>       data_regl = *args++;
>       data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
> @@ -1259,23 +1262,25 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
>       opc = get_memop(oi);
>   
>   #if defined(CONFIG_SOFTMMU)
> -    tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 0);
> +    base = tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 0);
>       tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
>       add_qemu_ldst_label(s, 0, oi,
>                           (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
>                           data_regl, data_regh, addr_regl, addr_regh,
>                           s->code_ptr, label_ptr);
>   #else
> -    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
> -        tcg_out_ext32u(s, base, addr_regl);
> -        addr_regl = base;
> -    }
>       a_bits = get_alignment_bits(opc);
>       if (a_bits) {
>           tcg_out_test_alignment(s, false, addr_regl, a_bits);
>       }
> +    base = addr_regl;
> +    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
> +        tcg_out_ext32u(s, TCG_REG_TMP0, base);
> +        base = TCG_REG_TMP0;
> +    }
>       if (guest_base != 0) {
> -        tcg_out_opc_reg(s, OPC_ADD, base, TCG_GUEST_BASE_REG, addr_regl);
> +        tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base);
> +        base = TCG_REG_TMP0;
>       }
>       tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
>   #endif
Alistair Francis Oct. 25, 2022, 10:45 p.m. UTC | #2
On Mon, Oct 24, 2022 at 1:26 PM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> When guest_base != 0, we were not coordinating the usage of
> TCG_REG_TMP0 as base properly, leading to a previous zero-extend
> of the input address being discarded.
>
> Shuffle the alignment check to the front, because that does not
> depend on the zero-extend, and it keeps the register usage clear.
> Set base after each step of the address arithmetic instead of before.
>
> Return the base register used from tcg_out_tlb_load, so as to
> keep that register choice localized to that function.
>
> Reported-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Reviewed-by: Alistair Francis <alistair.francis@wdc.com>

Alistair

> ---
>  tcg/riscv/tcg-target.c.inc | 39 +++++++++++++++++++++-----------------
>  1 file changed, 22 insertions(+), 17 deletions(-)
>
> diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
> index 2a84c57bec..e3b608034f 100644
> --- a/tcg/riscv/tcg-target.c.inc
> +++ b/tcg/riscv/tcg-target.c.inc
> @@ -923,9 +923,9 @@ static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
>      tcg_debug_assert(ok);
>  }
>
> -static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
> -                             TCGReg addrh, MemOpIdx oi,
> -                             tcg_insn_unit **label_ptr, bool is_load)
> +static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
> +                               TCGReg addrh, MemOpIdx oi,
> +                               tcg_insn_unit **label_ptr, bool is_load)
>  {
>      MemOp opc = get_memop(oi);
>      unsigned s_bits = opc & MO_SIZE;
> @@ -975,6 +975,7 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
>          addrl = TCG_REG_TMP0;
>      }
>      tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addrl);
> +    return TCG_REG_TMP0;
>  }
>
>  static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi,
> @@ -1177,7 +1178,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
>  #else
>      unsigned a_bits;
>  #endif
> -    TCGReg base = TCG_REG_TMP0;
> +    TCGReg base;
>
>      data_regl = *args++;
>      data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
> @@ -1187,23 +1188,25 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
>      opc = get_memop(oi);
>
>  #if defined(CONFIG_SOFTMMU)
> -    tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 1);
> +    base = tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 1);
>      tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
>      add_qemu_ldst_label(s, 1, oi,
>                          (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
>                          data_regl, data_regh, addr_regl, addr_regh,
>                          s->code_ptr, label_ptr);
>  #else
> -    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
> -        tcg_out_ext32u(s, base, addr_regl);
> -        addr_regl = base;
> -    }
>      a_bits = get_alignment_bits(opc);
>      if (a_bits) {
>          tcg_out_test_alignment(s, true, addr_regl, a_bits);
>      }
> +    base = addr_regl;
> +    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
> +        tcg_out_ext32u(s, TCG_REG_TMP0, base);
> +        base = TCG_REG_TMP0;
> +    }
>      if (guest_base != 0) {
> -        tcg_out_opc_reg(s, OPC_ADD, base, TCG_GUEST_BASE_REG, addr_regl);
> +        tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base);
> +        base = TCG_REG_TMP0;
>      }
>      tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
>  #endif
> @@ -1249,7 +1252,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
>  #else
>      unsigned a_bits;
>  #endif
> -    TCGReg base = TCG_REG_TMP0;
> +    TCGReg base;
>
>      data_regl = *args++;
>      data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
> @@ -1259,23 +1262,25 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
>      opc = get_memop(oi);
>
>  #if defined(CONFIG_SOFTMMU)
> -    tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 0);
> +    base = tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 0);
>      tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
>      add_qemu_ldst_label(s, 0, oi,
>                          (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
>                          data_regl, data_regh, addr_regl, addr_regh,
>                          s->code_ptr, label_ptr);
>  #else
> -    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
> -        tcg_out_ext32u(s, base, addr_regl);
> -        addr_regl = base;
> -    }
>      a_bits = get_alignment_bits(opc);
>      if (a_bits) {
>          tcg_out_test_alignment(s, false, addr_regl, a_bits);
>      }
> +    base = addr_regl;
> +    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
> +        tcg_out_ext32u(s, TCG_REG_TMP0, base);
> +        base = TCG_REG_TMP0;
> +    }
>      if (guest_base != 0) {
> -        tcg_out_opc_reg(s, OPC_ADD, base, TCG_GUEST_BASE_REG, addr_regl);
> +        tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base);
> +        base = TCG_REG_TMP0;
>      }
>      tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
>  #endif
> --
> 2.34.1
>
>
Alistair Francis Oct. 25, 2022, 11:34 p.m. UTC | #3
On Mon, Oct 24, 2022 at 1:26 PM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> When guest_base != 0, we were not coordinating the usage of
> TCG_REG_TMP0 as base properly, leading to a previous zero-extend
> of the input address being discarded.
>
> Shuffle the alignment check to the front, because that does not
> depend on the zero-extend, and it keeps the register usage clear.
> Set base after each step of the address arithmetic instead of before.
>
> Return the base register used from tcg_out_tlb_load, so as to
> keep that register choice localized to that function.
>
> Reported-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Thanks!

Applied to riscv-to-apply.next

Alistair

> ---
>  tcg/riscv/tcg-target.c.inc | 39 +++++++++++++++++++++-----------------
>  1 file changed, 22 insertions(+), 17 deletions(-)
>
> diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
> index 2a84c57bec..e3b608034f 100644
> --- a/tcg/riscv/tcg-target.c.inc
> +++ b/tcg/riscv/tcg-target.c.inc
> @@ -923,9 +923,9 @@ static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
>      tcg_debug_assert(ok);
>  }
>
> -static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
> -                             TCGReg addrh, MemOpIdx oi,
> -                             tcg_insn_unit **label_ptr, bool is_load)
> +static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
> +                               TCGReg addrh, MemOpIdx oi,
> +                               tcg_insn_unit **label_ptr, bool is_load)
>  {
>      MemOp opc = get_memop(oi);
>      unsigned s_bits = opc & MO_SIZE;
> @@ -975,6 +975,7 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
>          addrl = TCG_REG_TMP0;
>      }
>      tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addrl);
> +    return TCG_REG_TMP0;
>  }
>
>  static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi,
> @@ -1177,7 +1178,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
>  #else
>      unsigned a_bits;
>  #endif
> -    TCGReg base = TCG_REG_TMP0;
> +    TCGReg base;
>
>      data_regl = *args++;
>      data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
> @@ -1187,23 +1188,25 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
>      opc = get_memop(oi);
>
>  #if defined(CONFIG_SOFTMMU)
> -    tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 1);
> +    base = tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 1);
>      tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
>      add_qemu_ldst_label(s, 1, oi,
>                          (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
>                          data_regl, data_regh, addr_regl, addr_regh,
>                          s->code_ptr, label_ptr);
>  #else
> -    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
> -        tcg_out_ext32u(s, base, addr_regl);
> -        addr_regl = base;
> -    }
>      a_bits = get_alignment_bits(opc);
>      if (a_bits) {
>          tcg_out_test_alignment(s, true, addr_regl, a_bits);
>      }
> +    base = addr_regl;
> +    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
> +        tcg_out_ext32u(s, TCG_REG_TMP0, base);
> +        base = TCG_REG_TMP0;
> +    }
>      if (guest_base != 0) {
> -        tcg_out_opc_reg(s, OPC_ADD, base, TCG_GUEST_BASE_REG, addr_regl);
> +        tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base);
> +        base = TCG_REG_TMP0;
>      }
>      tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
>  #endif
> @@ -1249,7 +1252,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
>  #else
>      unsigned a_bits;
>  #endif
> -    TCGReg base = TCG_REG_TMP0;
> +    TCGReg base;
>
>      data_regl = *args++;
>      data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
> @@ -1259,23 +1262,25 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
>      opc = get_memop(oi);
>
>  #if defined(CONFIG_SOFTMMU)
> -    tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 0);
> +    base = tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 0);
>      tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
>      add_qemu_ldst_label(s, 0, oi,
>                          (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
>                          data_regl, data_regh, addr_regl, addr_regh,
>                          s->code_ptr, label_ptr);
>  #else
> -    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
> -        tcg_out_ext32u(s, base, addr_regl);
> -        addr_regl = base;
> -    }
>      a_bits = get_alignment_bits(opc);
>      if (a_bits) {
>          tcg_out_test_alignment(s, false, addr_regl, a_bits);
>      }
> +    base = addr_regl;
> +    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
> +        tcg_out_ext32u(s, TCG_REG_TMP0, base);
> +        base = TCG_REG_TMP0;
> +    }
>      if (guest_base != 0) {
> -        tcg_out_opc_reg(s, OPC_ADD, base, TCG_GUEST_BASE_REG, addr_regl);
> +        tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base);
> +        base = TCG_REG_TMP0;
>      }
>      tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
>  #endif
> --
> 2.34.1
>
>
diff mbox series

Patch

diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index 2a84c57bec..e3b608034f 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -923,9 +923,9 @@  static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
     tcg_debug_assert(ok);
 }
 
-static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
-                             TCGReg addrh, MemOpIdx oi,
-                             tcg_insn_unit **label_ptr, bool is_load)
+static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
+                               TCGReg addrh, MemOpIdx oi,
+                               tcg_insn_unit **label_ptr, bool is_load)
 {
     MemOp opc = get_memop(oi);
     unsigned s_bits = opc & MO_SIZE;
@@ -975,6 +975,7 @@  static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
         addrl = TCG_REG_TMP0;
     }
     tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addrl);
+    return TCG_REG_TMP0;
 }
 
 static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi,
@@ -1177,7 +1178,7 @@  static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
 #else
     unsigned a_bits;
 #endif
-    TCGReg base = TCG_REG_TMP0;
+    TCGReg base;
 
     data_regl = *args++;
     data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
@@ -1187,23 +1188,25 @@  static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
     opc = get_memop(oi);
 
 #if defined(CONFIG_SOFTMMU)
-    tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 1);
+    base = tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 1);
     tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
     add_qemu_ldst_label(s, 1, oi,
                         (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
                         data_regl, data_regh, addr_regl, addr_regh,
                         s->code_ptr, label_ptr);
 #else
-    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
-        tcg_out_ext32u(s, base, addr_regl);
-        addr_regl = base;
-    }
     a_bits = get_alignment_bits(opc);
     if (a_bits) {
         tcg_out_test_alignment(s, true, addr_regl, a_bits);
     }
+    base = addr_regl;
+    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+        tcg_out_ext32u(s, TCG_REG_TMP0, base);
+        base = TCG_REG_TMP0;
+    }
     if (guest_base != 0) {
-        tcg_out_opc_reg(s, OPC_ADD, base, TCG_GUEST_BASE_REG, addr_regl);
+        tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base);
+        base = TCG_REG_TMP0;
     }
     tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
 #endif
@@ -1249,7 +1252,7 @@  static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
 #else
     unsigned a_bits;
 #endif
-    TCGReg base = TCG_REG_TMP0;
+    TCGReg base;
 
     data_regl = *args++;
     data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
@@ -1259,23 +1262,25 @@  static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
     opc = get_memop(oi);
 
 #if defined(CONFIG_SOFTMMU)
-    tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 0);
+    base = tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 0);
     tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
     add_qemu_ldst_label(s, 0, oi,
                         (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
                         data_regl, data_regh, addr_regl, addr_regh,
                         s->code_ptr, label_ptr);
 #else
-    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
-        tcg_out_ext32u(s, base, addr_regl);
-        addr_regl = base;
-    }
     a_bits = get_alignment_bits(opc);
     if (a_bits) {
         tcg_out_test_alignment(s, false, addr_regl, a_bits);
     }
+    base = addr_regl;
+    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+        tcg_out_ext32u(s, TCG_REG_TMP0, base);
+        base = TCG_REG_TMP0;
+    }
     if (guest_base != 0) {
-        tcg_out_opc_reg(s, OPC_ADD, base, TCG_GUEST_BASE_REG, addr_regl);
+        tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base);
+        base = TCG_REG_TMP0;
     }
     tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
 #endif