diff mbox

[v2,6/7] tcg: Introduce zero and sign-extended versions of load helpers

Message ID 1377813961-12208-7-git-send-email-rth@twiddle.net
State New
Headers show

Commit Message

Richard Henderson Aug. 29, 2013, 10:06 p.m. UTC
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 include/exec/softmmu_template.h | 58 ++++++++++++++++++++++++++++++++---------
 tcg/i386/tcg-target.c           |  6 ++---
 tcg/tcg.h                       | 21 ++++++++++-----
 3 files changed, 64 insertions(+), 21 deletions(-)

Comments

Aurelien Jarno Aug. 30, 2013, 4:55 p.m. UTC | #1
On Thu, Aug 29, 2013 at 03:06:00PM -0700, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  include/exec/softmmu_template.h | 58 ++++++++++++++++++++++++++++++++---------
>  tcg/i386/tcg-target.c           |  6 ++---
>  tcg/tcg.h                       | 21 ++++++++++-----
>  3 files changed, 64 insertions(+), 21 deletions(-)
> 
> diff --git a/include/exec/softmmu_template.h b/include/exec/softmmu_template.h
> index f9922e2..5bbc56a 100644
> --- a/include/exec/softmmu_template.h
> +++ b/include/exec/softmmu_template.h
> @@ -29,23 +29,39 @@
>  #if DATA_SIZE == 8
>  #define SUFFIX q
>  #define LSUFFIX q
> -#define DATA_TYPE uint64_t
> +#define SDATA_TYPE  int64_t
>  #elif DATA_SIZE == 4
>  #define SUFFIX l
>  #define LSUFFIX l
> -#define DATA_TYPE uint32_t
> +#define SDATA_TYPE  int32_t
>  #elif DATA_SIZE == 2
>  #define SUFFIX w
>  #define LSUFFIX uw
> -#define DATA_TYPE uint16_t
> +#define SDATA_TYPE  int16_t
>  #elif DATA_SIZE == 1
>  #define SUFFIX b
>  #define LSUFFIX ub
> -#define DATA_TYPE uint8_t
> +#define SDATA_TYPE  int8_t
>  #else
>  #error unsupported data size
>  #endif
>  
> +#define DATA_TYPE   glue(u, SDATA_TYPE)
> +
> +/* For the benefit of TCG generated code, we want to avoid the complication
> +   of ABI-specific return type promotion and always return a value extended
> +   to the register size of the host.  This is tcg_target_long, except in the
> +   case of a 32-bit host and 64-bit data, and for that we always have
> +   uint64_t.  Don't bother with this widened value for SOFTMMU_CODE_ACCESS.  */
> +#if defined(SOFTMMU_CODE_ACCESS) || DATA_SIZE == 8
> +# define WORD_TYPE  DATA_TYPE
> +# define USUFFIX    SUFFIX
> +#else
> +# define WORD_TYPE  tcg_target_ulong
> +# define USUFFIX    glue(u, SUFFIX)
> +# define SSUFFIX    glue(s, SUFFIX)
> +#endif
> +
>  #ifdef SOFTMMU_CODE_ACCESS
>  #define READ_ACCESS_TYPE 2
>  #define ADDR_READ addr_code
> @@ -77,10 +93,10 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
>  #ifdef SOFTMMU_CODE_ACCESS
>  static
>  #endif
> -DATA_TYPE
> -glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env,
> -                                             target_ulong addr, int mmu_idx,
> -                                             uintptr_t retaddr)
> +WORD_TYPE
> +glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)(CPUArchState *env,
> +                                              target_ulong addr, int mmu_idx,
> +                                              uintptr_t retaddr)
>  {
>      int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
>      target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
> @@ -126,9 +142,9 @@ glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env,
>          addr2 = addr1 + DATA_SIZE;
>          /* Note the adjustment at the beginning of the function.
>             Undo that for the recursion.  */
> -        res1 = glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)
> +        res1 = glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)
>              (env, addr1, mmu_idx, retaddr + GETPC_ADJ);
> -        res2 = glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)
> +        res2 = glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)
>              (env, addr2, mmu_idx, retaddr + GETPC_ADJ);
>          shift = (addr & (DATA_SIZE - 1)) * 8;
>  #ifdef TARGET_WORDS_BIGENDIAN
> @@ -147,19 +163,33 @@ glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env,
>  #endif
>  
>      haddr = addr + env->tlb_table[mmu_idx][index].addend;
> -    return glue(glue(ld, LSUFFIX), _raw)((uint8_t *)haddr);
> +    /* Note that ldl_raw is defined with type "int".  */
> +    return (DATA_TYPE) glue(glue(ld, LSUFFIX), _raw)((uint8_t *)haddr);
>  }
>  
>  DATA_TYPE
>  glue(glue(helper_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
>                                           int mmu_idx)
>  {
> -    return glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)(env, addr, mmu_idx,
> +    return glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)(env, addr, mmu_idx,
>                                                          GETRA_EXT());
>  }
>  
>  #ifndef SOFTMMU_CODE_ACCESS
>  
> +/* Provide signed versions of the load routines as well.  We can of course
> +   avoid this for 64-bit data, or for 32-bit data on 32-bit host.  */
> +#if DATA_SIZE * 8 < TCG_TARGET_REG_BITS
> +WORD_TYPE
> +glue(glue(helper_ret_ld, SSUFFIX), MMUSUFFIX)(CPUArchState *env,
> +                                              target_ulong addr, int mmu_idx,
> +                                              uintptr_t retaddr)
> +{
> +    return (SDATA_TYPE) glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)
> +        (env, addr, mmu_idx, retaddr);
> +}
> +#endif
> +
>  static inline void glue(io_write, SUFFIX)(CPUArchState *env,
>                                            hwaddr physaddr,
>                                            DATA_TYPE val,
> @@ -267,3 +297,7 @@ glue(glue(helper_st, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
>  #undef LSUFFIX
>  #undef DATA_SIZE
>  #undef ADDR_READ
> +#undef WORD_TYPE
> +#undef SDATA_TYPE
> +#undef USUFFIX
> +#undef SSUFFIX
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index 4c98cc4..5aee0fa 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -1026,9 +1026,9 @@ static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
>   *                                     int mmu_idx, uintptr_t ra)
>   */
>  static const void * const qemu_ld_helpers[4] = {
> -    helper_ret_ldb_mmu,
> -    helper_ret_ldw_mmu,
> -    helper_ret_ldl_mmu,
> +    helper_ret_ldub_mmu,
> +    helper_ret_lduw_mmu,
> +    helper_ret_ldul_mmu,
>      helper_ret_ldq_mmu,
>  };
>  
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index eeff6b7..27f6ee5 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -750,15 +750,24 @@ void tcg_out_tb_finalize(TCGContext *s);
>   * Memory helpers that will be used by TCG generated code.
>   */
>  #ifdef CONFIG_SOFTMMU
> -uint8_t helper_ret_ldb_mmu(CPUArchState *env, target_ulong addr,
> -                           int mmu_idx, uintptr_t retaddr);
> -uint16_t helper_ret_ldw_mmu(CPUArchState *env, target_ulong addr,
> -                            int mmu_idx, uintptr_t retaddr);
> -uint32_t helper_ret_ldl_mmu(CPUArchState *env, target_ulong addr,
> -                            int mmu_idx, uintptr_t retaddr);
> +/* Value zero-extended to tcg register size.  */
> +tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
> +                                     int mmu_idx, uintptr_t retaddr);
> +tcg_target_ulong helper_ret_lduw_mmu(CPUArchState *env, target_ulong addr,
> +                                     int mmu_idx, uintptr_t retaddr);
> +tcg_target_ulong helper_ret_ldul_mmu(CPUArchState *env, target_ulong addr,
> +                                     int mmu_idx, uintptr_t retaddr);
>  uint64_t helper_ret_ldq_mmu(CPUArchState *env, target_ulong addr,
>                              int mmu_idx, uintptr_t retaddr);
>  
> +/* Value sign-extended to tcg register size.  */
> +tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
> +                                     int mmu_idx, uintptr_t retaddr);
> +tcg_target_ulong helper_ret_ldsw_mmu(CPUArchState *env, target_ulong addr,
> +                                     int mmu_idx, uintptr_t retaddr);
> +tcg_target_ulong helper_ret_ldsl_mmu(CPUArchState *env, target_ulong addr,
> +                                     int mmu_idx, uintptr_t retaddr);
> +
>  void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
>                          int mmu_idx, uintptr_t retaddr);
>  void helper_ret_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,

While it works for x86 and some other architectures, it makes the
assumption that only part of the register can be used later by the TCG
code. It won't be the case if we later (and I hope we will) implement a
MIPS64 TCG target. In that case, a 32-bit value has to be returned
signed extended, which won't be the case for example for a 32-bit guest
loading a 16-bit unsigned value.
Richard Henderson Aug. 30, 2013, 5:20 p.m. UTC | #2
On 08/30/2013 09:55 AM, Aurelien Jarno wrote:
> While it works for x86 and some other architectures, it makes the
> assumption that only part of the register can be used later by the TCG
> code. It won't be the case if we later (and I hope we will) implement a
> MIPS64 TCG target. In that case, a 32-bit value has to be returned
> signed extended, which won't be the case for example for a 32-bit guest
> loading a 16-bit unsigned value.

This doesn't break the mips64 abi, since we'll be returning a 64-bit value, not
a 32-bit value that needs sign-extension.

Given a mips64 host with 32-bit guest, the sign-extension of the 32-bit load
can either happen by using helper_ret_ldsl_mmu in the table of helper
functions, or by using an sll insn instead of a move to put the value into
place at the end of the slow path.

I have more or less the same constraint in my as-yet unsubmitted Alpha backend.


r~
Aurelien Jarno Aug. 30, 2013, 7:12 p.m. UTC | #3
On Fri, Aug 30, 2013 at 10:20:23AM -0700, Richard Henderson wrote:
> On 08/30/2013 09:55 AM, Aurelien Jarno wrote:
> > While it works for x86 and some other architectures, it makes the
> > assumption that only part of the register can be used later by the TCG
> > code. It won't be the case if we later (and I hope we will) implement a
> > MIPS64 TCG target. In that case, a 32-bit value has to be returned
> > signed extended, which won't be the case for example for a 32-bit guest
> > loading a 16-bit unsigned value.
> 
> This doesn't break the mips64 abi, since we'll be returning a 64-bit value, not
> a 32-bit value that needs sign-extension.
> 
> Given a mips64 host with 32-bit guest, the sign-extension of the 32-bit load
> can either happen by using helper_ret_ldsl_mmu in the table of helper
> functions, or by using an sll insn instead of a move to put the value into
> place at the end of the slow path.

That's indeed a possibility. That said while the MIPS64 ABI is then
still followed, it would have break a MIPS backend as the ABI between
the helper and the TCG code is broken.

I am therefore concerned that we might break some of our 64-bit
backends. x86-64 and ia64 should be fine, I don't know about aarch64,
ppc64, sparc64 or s390x.
Richard Henderson Aug. 30, 2013, 8:53 p.m. UTC | #4
On 08/30/2013 12:12 PM, Aurelien Jarno wrote:
> On Fri, Aug 30, 2013 at 10:20:23AM -0700, Richard Henderson wrote:
>> On 08/30/2013 09:55 AM, Aurelien Jarno wrote:
>>> While it works for x86 and some other architectures, it makes the
>>> assumption that only part of the register can be used later by the TCG
>>> code. It won't be the case if we later (and I hope we will) implement a
>>> MIPS64 TCG target. In that case, a 32-bit value has to be returned
>>> signed extended, which won't be the case for example for a 32-bit guest
>>> loading a 16-bit unsigned value.
>>
>> This doesn't break the mips64 abi, since we'll be returning a 64-bit value, not
>> a 32-bit value that needs sign-extension.
>>
>> Given a mips64 host with 32-bit guest, the sign-extension of the 32-bit load
>> can either happen by using helper_ret_ldsl_mmu in the table of helper
>> functions, or by using an sll insn instead of a move to put the value into
>> place at the end of the slow path.
> 
> That's indeed a possibility. That said while the MIPS64 ABI is then
> still followed, it would have break a MIPS backend as the ABI between
> the helper and the TCG code is broken.

How's that?  We're passing a value extended to tcg_target_ulong.

For the 32-bit mips backend running o32 (or even a theoretical n32 backend),
that type is uint32_t.  That gets returned from C exactly how C returns that
type.  For o32 it's the full width of the register, full stop.  For n32, it
would be returned sign-extended in the 64-bit register.

Please explain exactly the failure mode you imagine, because I don't think
there is one.

> I am therefore concerned that we might break some of our 64-bit
> backends. x86-64 and ia64 should be fine, I don't know about aarch64,
> ppc64, sparc64 or s390x. 

Nope, all 4 of those will be fine.  Not least of which because the later 3
are still using the original helper functions, not the new helper_ret_* ones.

But certainly all 4 of those are, in the gcc sense, TRULY_NOOP_TRUNCATION
machines, meaning we can truncate to 32 bits merely by ignoring the garbage
in the high bits.  In practice it means that they have different 32-bit and
64-bit comparison instructions.


r~
Aurelien Jarno Aug. 30, 2013, 9:23 p.m. UTC | #5
On Fri, Aug 30, 2013 at 01:53:37PM -0700, Richard Henderson wrote:
> On 08/30/2013 12:12 PM, Aurelien Jarno wrote:
> > On Fri, Aug 30, 2013 at 10:20:23AM -0700, Richard Henderson wrote:
> >> On 08/30/2013 09:55 AM, Aurelien Jarno wrote:
> >>> While it works for x86 and some other architectures, it makes the
> >>> assumption that only part of the register can be used later by the TCG
> >>> code. It won't be the case if we later (and I hope we will) implement a
> >>> MIPS64 TCG target. In that case, a 32-bit value has to be returned
> >>> signed extended, which won't be the case for example for a 32-bit guest
> >>> loading a 16-bit unsigned value.
> >>
> >> This doesn't break the mips64 abi, since we'll be returning a 64-bit value, not
> >> a 32-bit value that needs sign-extension.
> >>
> >> Given a mips64 host with 32-bit guest, the sign-extension of the 32-bit load
> >> can either happen by using helper_ret_ldsl_mmu in the table of helper
> >> functions, or by using an sll insn instead of a move to put the value into
> >> place at the end of the slow path.
> > 
> > That's indeed a possibility. That said while the MIPS64 ABI is then
> > still followed, it would have break a MIPS backend as the ABI between
> > the helper and the TCG code is broken.

Sorry I meant MIPS64 backend.

> How's that?  We're passing a value extended to tcg_target_ulong.
> 
> For the 32-bit mips backend running o32 (or even a theoretical n32 backend),
> that type is uint32_t.  That gets returned from C exactly how C returns that
> type.  For o32 it's the full width of the register, full stop.  For n32, it
> would be returned sign-extended in the 64-bit register.
> 
> Please explain exactly the failure mode you imagine, because I don't think
> there is one.

For MIPS64, to load a 32-bit value in a 32-bit guest, the slow path
would have called helper_ret_ldl_mmu(), returning and uint32_t, but then
signed extended to 64-bit in the register as per MIPS64 ABI.

With the new code helper_ret_ldl_mmu() would return a tcg_target_ulong
value, so zero extended to 64-bit. If this register is later used in a
32-bit op, it is not anymore sign extended, and the result is then
unpredictable.

As you said earlier the solution would have been to replace
helper_ret_ldl_mmu() by helper_ret_ldsl_mmu(), but it should have been
done in the same patch to not break things. I just wanted to make
sure we don't have the problem in an another target

> > I am therefore concerned that we might break some of our 64-bit
> > backends. x86-64 and ia64 should be fine, I don't know about aarch64,
> > ppc64, sparc64 or s390x. 
> 
> Nope, all 4 of those will be fine.  Not least of which because the later 3
> are still using the original helper functions, not the new helper_ret_* ones.
> 
> But certainly all 4 of those are, in the gcc sense, TRULY_NOOP_TRUNCATION
> machines, meaning we can truncate to 32 bits merely by ignoring the garbage
> in the high bits.  In practice it means that they have different 32-bit and
> 64-bit comparison instructions.

If they are all fine, you get:

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Richard Henderson Aug. 31, 2013, 12:05 a.m. UTC | #6
On 08/30/2013 02:23 PM, Aurelien Jarno wrote:
> As you said earlier the solution would have been to replace
> helper_ret_ldl_mmu() by helper_ret_ldsl_mmu(), but it should have been
> done in the same patch to not break things. I just wanted to make
> sure we don't have the problem in an another target

Ah, but that's where we're in luck: helper_ret_ldl_mmu is brand new.
It's all of a week old, introduced with the first set of these patches.

There are no legacy users.


r~
diff mbox

Patch

diff --git a/include/exec/softmmu_template.h b/include/exec/softmmu_template.h
index f9922e2..5bbc56a 100644
--- a/include/exec/softmmu_template.h
+++ b/include/exec/softmmu_template.h
@@ -29,23 +29,39 @@ 
 #if DATA_SIZE == 8
 #define SUFFIX q
 #define LSUFFIX q
-#define DATA_TYPE uint64_t
+#define SDATA_TYPE  int64_t
 #elif DATA_SIZE == 4
 #define SUFFIX l
 #define LSUFFIX l
-#define DATA_TYPE uint32_t
+#define SDATA_TYPE  int32_t
 #elif DATA_SIZE == 2
 #define SUFFIX w
 #define LSUFFIX uw
-#define DATA_TYPE uint16_t
+#define SDATA_TYPE  int16_t
 #elif DATA_SIZE == 1
 #define SUFFIX b
 #define LSUFFIX ub
-#define DATA_TYPE uint8_t
+#define SDATA_TYPE  int8_t
 #else
 #error unsupported data size
 #endif
 
+#define DATA_TYPE   glue(u, SDATA_TYPE)
+
+/* For the benefit of TCG generated code, we want to avoid the complication
+   of ABI-specific return type promotion and always return a value extended
+   to the register size of the host.  This is tcg_target_long, except in the
+   case of a 32-bit host and 64-bit data, and for that we always have
+   uint64_t.  Don't bother with this widened value for SOFTMMU_CODE_ACCESS.  */
+#if defined(SOFTMMU_CODE_ACCESS) || DATA_SIZE == 8
+# define WORD_TYPE  DATA_TYPE
+# define USUFFIX    SUFFIX
+#else
+# define WORD_TYPE  tcg_target_ulong
+# define USUFFIX    glue(u, SUFFIX)
+# define SSUFFIX    glue(s, SUFFIX)
+#endif
+
 #ifdef SOFTMMU_CODE_ACCESS
 #define READ_ACCESS_TYPE 2
 #define ADDR_READ addr_code
@@ -77,10 +93,10 @@  static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
 #ifdef SOFTMMU_CODE_ACCESS
 static
 #endif
-DATA_TYPE
-glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env,
-                                             target_ulong addr, int mmu_idx,
-                                             uintptr_t retaddr)
+WORD_TYPE
+glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)(CPUArchState *env,
+                                              target_ulong addr, int mmu_idx,
+                                              uintptr_t retaddr)
 {
     int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
     target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
@@ -126,9 +142,9 @@  glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env,
         addr2 = addr1 + DATA_SIZE;
         /* Note the adjustment at the beginning of the function.
            Undo that for the recursion.  */
-        res1 = glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)
+        res1 = glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)
             (env, addr1, mmu_idx, retaddr + GETPC_ADJ);
-        res2 = glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)
+        res2 = glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)
             (env, addr2, mmu_idx, retaddr + GETPC_ADJ);
         shift = (addr & (DATA_SIZE - 1)) * 8;
 #ifdef TARGET_WORDS_BIGENDIAN
@@ -147,19 +163,33 @@  glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env,
 #endif
 
     haddr = addr + env->tlb_table[mmu_idx][index].addend;
-    return glue(glue(ld, LSUFFIX), _raw)((uint8_t *)haddr);
+    /* Note that ldl_raw is defined with type "int".  */
+    return (DATA_TYPE) glue(glue(ld, LSUFFIX), _raw)((uint8_t *)haddr);
 }
 
 DATA_TYPE
 glue(glue(helper_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
                                          int mmu_idx)
 {
-    return glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)(env, addr, mmu_idx,
+    return glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)(env, addr, mmu_idx,
                                                         GETRA_EXT());
 }
 
 #ifndef SOFTMMU_CODE_ACCESS
 
+/* Provide signed versions of the load routines as well.  We can of course
+   avoid this for 64-bit data, or for 32-bit data on 32-bit host.  */
+#if DATA_SIZE * 8 < TCG_TARGET_REG_BITS
+WORD_TYPE
+glue(glue(helper_ret_ld, SSUFFIX), MMUSUFFIX)(CPUArchState *env,
+                                              target_ulong addr, int mmu_idx,
+                                              uintptr_t retaddr)
+{
+    return (SDATA_TYPE) glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)
+        (env, addr, mmu_idx, retaddr);
+}
+#endif
+
 static inline void glue(io_write, SUFFIX)(CPUArchState *env,
                                           hwaddr physaddr,
                                           DATA_TYPE val,
@@ -267,3 +297,7 @@  glue(glue(helper_st, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
 #undef LSUFFIX
 #undef DATA_SIZE
 #undef ADDR_READ
+#undef WORD_TYPE
+#undef SDATA_TYPE
+#undef USUFFIX
+#undef SSUFFIX
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 4c98cc4..5aee0fa 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -1026,9 +1026,9 @@  static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
  *                                     int mmu_idx, uintptr_t ra)
  */
 static const void * const qemu_ld_helpers[4] = {
-    helper_ret_ldb_mmu,
-    helper_ret_ldw_mmu,
-    helper_ret_ldl_mmu,
+    helper_ret_ldub_mmu,
+    helper_ret_lduw_mmu,
+    helper_ret_ldul_mmu,
     helper_ret_ldq_mmu,
 };
 
diff --git a/tcg/tcg.h b/tcg/tcg.h
index eeff6b7..27f6ee5 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -750,15 +750,24 @@  void tcg_out_tb_finalize(TCGContext *s);
  * Memory helpers that will be used by TCG generated code.
  */
 #ifdef CONFIG_SOFTMMU
-uint8_t helper_ret_ldb_mmu(CPUArchState *env, target_ulong addr,
-                           int mmu_idx, uintptr_t retaddr);
-uint16_t helper_ret_ldw_mmu(CPUArchState *env, target_ulong addr,
-                            int mmu_idx, uintptr_t retaddr);
-uint32_t helper_ret_ldl_mmu(CPUArchState *env, target_ulong addr,
-                            int mmu_idx, uintptr_t retaddr);
+/* Value zero-extended to tcg register size.  */
+tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
+                                     int mmu_idx, uintptr_t retaddr);
+tcg_target_ulong helper_ret_lduw_mmu(CPUArchState *env, target_ulong addr,
+                                     int mmu_idx, uintptr_t retaddr);
+tcg_target_ulong helper_ret_ldul_mmu(CPUArchState *env, target_ulong addr,
+                                     int mmu_idx, uintptr_t retaddr);
 uint64_t helper_ret_ldq_mmu(CPUArchState *env, target_ulong addr,
                             int mmu_idx, uintptr_t retaddr);
 
+/* Value sign-extended to tcg register size.  */
+tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
+                                     int mmu_idx, uintptr_t retaddr);
+tcg_target_ulong helper_ret_ldsw_mmu(CPUArchState *env, target_ulong addr,
+                                     int mmu_idx, uintptr_t retaddr);
+tcg_target_ulong helper_ret_ldsl_mmu(CPUArchState *env, target_ulong addr,
+                                     int mmu_idx, uintptr_t retaddr);
+
 void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
                         int mmu_idx, uintptr_t retaddr);
 void helper_ret_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,