diff mbox

[RFC,v3,2/3] tcg: Add declarations and templates of extended MMU helpers

Message ID 1342261414-6069-3-git-send-email-yeongkyoon.lee@samsung.com
State New
Headers show

Commit Message

YeongKyoon Lee July 14, 2012, 10:23 a.m. UTC
Add declarations and templates of extended MMU helpers.
An extended helper takes an additional argument of the host address accessing
a guest memory which differs from the address of the call site to the helper
because helper call sites locate at the end of a generated code block.
---
 softmmu_defs.h     |   64 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 softmmu_header.h   |   31 +++++++++++++++++++++++++
 softmmu_template.h |   52 +++++++++++++++++++++++++++++++++++------
 3 files changed, 139 insertions(+), 8 deletions(-)

Comments

Blue Swirl July 14, 2012, 1:08 p.m. UTC | #1
On Sat, Jul 14, 2012 at 10:23 AM, Yeongkyoon Lee
<yeongkyoon.lee@samsung.com> wrote:
> Add declarations and templates of extended MMU helpers.
> An extended helper takes an additional argument of the host address accessing
> a guest memory which differs from the address of the call site to the helper
> because helper call sites locate at the end of a generated code block.
> ---
>  softmmu_defs.h     |   64 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  softmmu_header.h   |   31 +++++++++++++++++++++++++
>  softmmu_template.h |   52 +++++++++++++++++++++++++++++++++++------
>  3 files changed, 139 insertions(+), 8 deletions(-)
>
> diff --git a/softmmu_defs.h b/softmmu_defs.h
> index 8d59f9d..505f6ba 100644
> --- a/softmmu_defs.h
> +++ b/softmmu_defs.h
> @@ -10,6 +10,8 @@
>  #define SOFTMMU_DEFS_H
>
>  #ifndef CONFIG_TCG_PASS_AREG0
> +
> +#ifndef CONFIG_QEMU_LDST_OPTIMIZATION
>  uint8_t __ldb_mmu(target_ulong addr, int mmu_idx);
>  void __stb_mmu(target_ulong addr, uint8_t val, int mmu_idx);
>  uint16_t __ldw_mmu(target_ulong addr, int mmu_idx);
> @@ -28,6 +30,30 @@ void __stl_cmmu(target_ulong addr, uint32_t val, int mmu_idx);
>  uint64_t __ldq_cmmu(target_ulong addr, int mmu_idx);
>  void __stq_cmmu(target_ulong addr, uint64_t val, int mmu_idx);
>  #else
> +/* Extended versions of MMU helpers for qemu_ld/st optimization.
> +   The additional argument is a host code address accessing guest memory */
> +uint8_t ext_ldb_mmu(target_ulong addr, int mmu_idx, uintptr_t ra);
> +void ext_stb_mmu(target_ulong addr, uint8_t val, int mmu_idx, uintptr_t ra);
> +uint16_t ext_ldw_mmu(target_ulong addr, int mmu_idx, uintptr_t ra);
> +void ext_stw_mmu(target_ulong addr, uint16_t val, int mmu_idx, uintptr_t ra);
> +uint32_t ext_ldl_mmu(target_ulong addr, int mmu_idx, uintptr_t ra);
> +void ext_stl_mmu(target_ulong addr, uint32_t val, int mmu_idx, uintptr_t ra);
> +uint64_t ext_ldq_mmu(target_ulong addr, int mmu_idx, uintptr_t ra);
> +void ext_stq_mmu(target_ulong addr, uint64_t val, int mmu_idx, uintptr_t ra);
> +
> +uint8_t ext_ldb_cmmu(target_ulong addr, int mmu_idx, uintptr_t ra);
> +void ext_stb_cmmu(target_ulong addr, uint8_t val, int mmu_idx, uintptr_t ra);
> +uint16_t ext_ldw_cmmu(target_ulong addr, int mmu_idx, uintptr_t ra);
> +void ext_stw_cmmu(target_ulong addr, uint16_t val, int mmu_idx, uintptr_t ra);
> +uint32_t ext_ldl_cmmu(target_ulong addr, int mmu_idx, uintptr_t ra);
> +void ext_stl_cmmu(target_ulong addr, uint32_t val, int mmu_idx, uintptr_t ra);
> +uint64_t ext_ldq_cmmu(target_ulong addr, int mmu_idx, uintptr_t ra);
> +void ext_stq_cmmu(target_ulong addr, uint64_t val, int mmu_idx, uintptr_t ra);
> +#endif  /* !CONFIG_QEMU_LDST_OPTIMIZATION */
> +
> +#else
> +
> +#ifndef CONFIG_QEMU_LDST_OPTIMIZATION
>  uint8_t helper_ldb_mmu(CPUArchState *env, target_ulong addr, int mmu_idx);
>  void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
>                      int mmu_idx);
> @@ -53,6 +79,44 @@ void helper_stl_cmmu(CPUArchState *env, target_ulong addr, uint32_t val,
>  uint64_t helper_ldq_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx);
>  void helper_stq_cmmu(CPUArchState *env, target_ulong addr, uint64_t val,
>                       int mmu_idx);
> +#else
> +/* Extended versions of MMU helpers for qemu_ld/st optimization.
> +   The additional argument is a host code address accessing guest memory */
> +uint8_t ext_helper_ldb_mmu(CPUArchState *env, target_ulong addr, int mmu_idx,
> +                           uintptr_t ra);
> +void ext_helper_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
> +                        int mmu_idx, uintptr_t ra);
> +uint16_t ext_helper_ldw_mmu(CPUArchState *env, target_ulong addr, int mmu_idx,
> +                            uintptr_t ra);
> +void ext_helper_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
> +                        int mmu_idx, uintptr_t ra);
> +uint32_t ext_helper_ldl_mmu(CPUArchState *env, target_ulong addr, int mmu_idx,
> +                            uintptr_t ra);
> +void ext_helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
> +                        int mmu_idx, uintptr_t ra);
> +uint64_t ext_helper_ldq_mmu(CPUArchState *env, target_ulong addr, int mmu_idx,
> +                            uintptr_t ra);
> +void ext_helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
> +                        int mmu_idx, uintptr_t ra);
> +
> +uint8_t ext_helper_ldb_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx,
> +                            uintptr_t ra);
> +void ext_helper_stb_cmmu(CPUArchState *env, target_ulong addr, uint8_t val,
> +                         int mmu_idx, uintptr_t ra);
> +uint16_t ext_helper_ldw_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx,
> +                             uintptr_t ra);
> +void ext_helper_stw_cmmu(CPUArchState *env, target_ulong addr, uint16_t val,
> +                         int mmu_idx, uintptr_t ra);
> +uint32_t ext_helper_ldl_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx,
> +                             uintptr_t ra);
> +void ext_helper_stl_cmmu(CPUArchState *env, target_ulong addr, uint32_t val,
> +                         int mmu_idx, uintptr_t ra);
> +uint64_t ext_helper_ldq_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx,
> +                             uintptr_t ra);
> +void ext_helper_stq_cmmu(CPUArchState *env, target_ulong addr, uint64_t val,
> +                         int mmu_idx, uintptr_t ra);
> +#endif  /* !CONFIG_QEMU_LDST_OPTIMIZATION */
> +
>  #endif
>
>  #endif
> diff --git a/softmmu_header.h b/softmmu_header.h
> index cf1aa38..07852c4 100644
> --- a/softmmu_header.h
> +++ b/softmmu_header.h
> @@ -82,12 +82,20 @@
>  #define ENV_PARAM
>  #define ENV_VAR
>  #define CPU_PREFIX
> +#ifdef CONFIG_QEMU_LDST_OPTIMIZATION
> +#define HELPER_PREFIX ext_
> +#else
>  #define HELPER_PREFIX __
> +#endif  /* CONFIG_QEMU_LDST_OPTIMIZATION */
>  #else
>  #define ENV_PARAM CPUArchState *env,
>  #define ENV_VAR env,
>  #define CPU_PREFIX cpu_
> +#ifdef CONFIG_QEMU_LDST_OPTIMIZATION
> +#define HELPER_PREFIX ext_helper_
> +#else
>  #define HELPER_PREFIX helper_
> +#endif  /* CONFIG_QEMU_LDST_OPTIMIZATION */
>  #endif
>
>  /* generic load/store macros */
> @@ -106,9 +114,16 @@ glue(glue(glue(CPU_PREFIX, ld), USUFFIX), MEMSUFFIX)(ENV_PARAM
>      mmu_idx = CPU_MMU_INDEX;
>      if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
>                   (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
> +#ifdef CONFIG_QEMU_LDST_OPTIMIZATION
> +        /* XXX: This macro branching is due to checkpatch.pl which
> +           doesn't allow "#define RET_VAR , (uintptr_t)NULL" */

Just do it, checkpatch.pl is not infallible.

> +        res = glue(glue(glue(HELPER_PREFIX, ld), SUFFIX),
> +                   MMUSUFFIX)(ENV_VAR addr, mmu_idx, (uintptr_t)NULL);
> +#else
>          res = glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_VAR
>                                                                       addr,
>                                                                       mmu_idx);
> +#endif  /* CONFIG_QEMU_LDST_OPTIMIZATION */
>      } else {
>          uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
>          res = glue(glue(ld, USUFFIX), _raw)(hostaddr);
> @@ -130,8 +145,16 @@ glue(glue(glue(CPU_PREFIX, lds), SUFFIX), MEMSUFFIX)(ENV_PARAM
>      mmu_idx = CPU_MMU_INDEX;
>      if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
>                   (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
> +#ifdef CONFIG_QEMU_LDST_OPTIMIZATION
> +        /* XXX: This macro branching is due to checkpatch.pl which
> +           doesn't allow "#define RET_VAR , (uintptr_t)NULL" */
> +        res = (DATA_STYPE)glue(glue(glue(HELPER_PREFIX, ld), SUFFIX),
> +                               MMUSUFFIX)(ENV_VAR addr, mmu_idx,
> +                                          (uintptr_t)NULL);
> +#else
>          res = (DATA_STYPE)glue(glue(glue(HELPER_PREFIX, ld), SUFFIX),
>                                 MMUSUFFIX)(ENV_VAR addr, mmu_idx);
> +#endif  /* CONFIG_QEMU_LDST_OPTIMIZATION */
>      } else {
>          uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
>          res = glue(glue(lds, SUFFIX), _raw)(hostaddr);
> @@ -157,8 +180,16 @@ glue(glue(glue(CPU_PREFIX, st), SUFFIX), MEMSUFFIX)(ENV_PARAM target_ulong ptr,
>      mmu_idx = CPU_MMU_INDEX;
>      if (unlikely(env->tlb_table[mmu_idx][page_index].addr_write !=
>                   (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
> +#ifdef CONFIG_QEMU_LDST_OPTIMIZATION
> +        /* XXX: This macro branching is due to checkpatch.pl which
> +           doesn't allow "#define RET_VAR , (uintptr_t)NULL" */
> +        glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_VAR addr, v,
> +                                                               mmu_idx,
> +                                                               (uintptr_t)NULL);
> +#else
>          glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_VAR addr, v,
>                                                                 mmu_idx);
> +#endif  /* CONFIG_QEMU_LDST_OPTIMIZATION */
>      } else {
>          uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
>          glue(glue(st, SUFFIX), _raw)(hostaddr, v);
> diff --git a/softmmu_template.h b/softmmu_template.h
> index b8bd700..5096c63 100644
> --- a/softmmu_template.h
> +++ b/softmmu_template.h
> @@ -66,6 +66,21 @@
>  #define HELPER_PREFIX helper_
>  #endif
>
> +#ifdef CONFIG_QEMU_LDST_OPTIMIZATION
> +#undef HELPER_PREFIX
> +/* Redefine helper prefix */
> +#ifndef CONFIG_TCG_PASS_AREG0
> +#define HELPER_PREFIX ext_
> +#else
> +#define HELPER_PREFIX ext_helper_
> +#endif
> +/* An extended MMU helper takes one more argument which is
> +   a host address of generated code accessing guest memory */
> +#define GET_RET_ADDR() ra
> +#else
> +#define GET_RET_ADDR() GETPC()
> +#endif  /* CONFIG_QEMU_LDST_OPTIMIZATION */
> +
>  static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_PARAM
>                                                          target_ulong addr,
>                                                          int mmu_idx,
> @@ -103,10 +118,20 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(ENV_PARAM
>  }
>
>  /* handle all cases except unaligned access which span two pages */
> +#ifdef CONFIG_QEMU_LDST_OPTIMIZATION
> +/* XXX: This macro branching is due to checkpatch.pl which doesn't allow
> +   "#define RET_PARAM , uintptr_r ra" */
> +DATA_TYPE
> +glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_PARAM
> +                                                       target_ulong addr,
> +                                                       int mmu_idx,
> +                                                       uintptr_t ra)
> +#else
>  DATA_TYPE
>  glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_PARAM
>                                                         target_ulong addr,
>                                                         int mmu_idx)
> +#endif  /* CONFIG_QEMU_LDST_OPTIMIZATION */
>  {
>      DATA_TYPE res;
>      int index;
> @@ -124,13 +149,13 @@ glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_PARAM
>              /* IO access */
>              if ((addr & (DATA_SIZE - 1)) != 0)
>                  goto do_unaligned_access;
> -            retaddr = GETPC();
> +            retaddr = GET_RET_ADDR();
>              ioaddr = env->iotlb[mmu_idx][index];
>              res = glue(io_read, SUFFIX)(ENV_VAR ioaddr, addr, retaddr);
>          } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
>              /* slow unaligned access (it spans two pages or IO) */
>          do_unaligned_access:
> -            retaddr = GETPC();
> +            retaddr = GET_RET_ADDR();
>  #ifdef ALIGNED_ONLY
>              do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
>  #endif
> @@ -141,7 +166,7 @@ glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_PARAM
>              uintptr_t addend;
>  #ifdef ALIGNED_ONLY
>              if ((addr & (DATA_SIZE - 1)) != 0) {
> -                retaddr = GETPC();
> +                retaddr = GET_RET_ADDR();
>                  do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
>              }
>  #endif
> @@ -151,7 +176,7 @@ glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_PARAM
>          }
>      } else {
>          /* the page is not in the TLB : fill it */
> -        retaddr = GETPC();
> +        retaddr = GET_RET_ADDR();
>  #ifdef ALIGNED_ONLY
>          if ((addr & (DATA_SIZE - 1)) != 0)
>              do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
> @@ -253,10 +278,20 @@ static inline void glue(io_write, SUFFIX)(ENV_PARAM
>  #endif /* SHIFT > 2 */
>  }
>
> +#ifdef CONFIG_QEMU_LDST_OPTIMIZATION
> +/* XXX: This macro branching is due to checkpatch.pl which doesn't allow
> +   "#define RET_PARAM , uintptr_t ra" */
> +void glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_PARAM
> +                                                            target_ulong addr,
> +                                                            DATA_TYPE val,
> +                                                            int mmu_idx,
> +                                                            uintptr_t ra)
> +#else
>  void glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_PARAM
>                                                              target_ulong addr,
>                                                              DATA_TYPE val,
>                                                              int mmu_idx)
> +#endif  /* CONFIG_QEMU_LDST_OPTIMIZATION */
>  {
>      target_phys_addr_t ioaddr;
>      target_ulong tlb_addr;
> @@ -271,12 +306,12 @@ void glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_PARAM
>              /* IO access */
>              if ((addr & (DATA_SIZE - 1)) != 0)
>                  goto do_unaligned_access;
> -            retaddr = GETPC();
> +            retaddr = GET_RET_ADDR();
>              ioaddr = env->iotlb[mmu_idx][index];
>              glue(io_write, SUFFIX)(ENV_VAR ioaddr, val, addr, retaddr);
>          } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
>          do_unaligned_access:
> -            retaddr = GETPC();
> +            retaddr = GET_RET_ADDR();
>  #ifdef ALIGNED_ONLY
>              do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr);
>  #endif
> @@ -287,7 +322,7 @@ void glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_PARAM
>              uintptr_t addend;
>  #ifdef ALIGNED_ONLY
>              if ((addr & (DATA_SIZE - 1)) != 0) {
> -                retaddr = GETPC();
> +                retaddr = GET_RET_ADDR();
>                  do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr);
>              }
>  #endif
> @@ -297,7 +332,7 @@ void glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_PARAM
>          }
>      } else {
>          /* the page is not in the TLB : fill it */
> -        retaddr = GETPC();
> +        retaddr = GET_RET_ADDR();
>  #ifdef ALIGNED_ONLY
>          if ((addr & (DATA_SIZE - 1)) != 0)
>              do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr);
> @@ -370,3 +405,4 @@ static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(ENV_PARAM
>  #undef ENV_VAR
>  #undef CPU_PREFIX
>  #undef HELPER_PREFIX
> +#undef GET_RET_ADDR
> --
> 1.7.4.1
>
diff mbox

Patch

diff --git a/softmmu_defs.h b/softmmu_defs.h
index 8d59f9d..505f6ba 100644
--- a/softmmu_defs.h
+++ b/softmmu_defs.h
@@ -10,6 +10,8 @@ 
 #define SOFTMMU_DEFS_H
 
 #ifndef CONFIG_TCG_PASS_AREG0
+
+#ifndef CONFIG_QEMU_LDST_OPTIMIZATION
 uint8_t __ldb_mmu(target_ulong addr, int mmu_idx);
 void __stb_mmu(target_ulong addr, uint8_t val, int mmu_idx);
 uint16_t __ldw_mmu(target_ulong addr, int mmu_idx);
@@ -28,6 +30,30 @@  void __stl_cmmu(target_ulong addr, uint32_t val, int mmu_idx);
 uint64_t __ldq_cmmu(target_ulong addr, int mmu_idx);
 void __stq_cmmu(target_ulong addr, uint64_t val, int mmu_idx);
 #else
+/* Extended versions of MMU helpers for qemu_ld/st optimization.
+   The additional argument is a host code address accessing guest memory */
+uint8_t ext_ldb_mmu(target_ulong addr, int mmu_idx, uintptr_t ra);
+void ext_stb_mmu(target_ulong addr, uint8_t val, int mmu_idx, uintptr_t ra);
+uint16_t ext_ldw_mmu(target_ulong addr, int mmu_idx, uintptr_t ra);
+void ext_stw_mmu(target_ulong addr, uint16_t val, int mmu_idx, uintptr_t ra);
+uint32_t ext_ldl_mmu(target_ulong addr, int mmu_idx, uintptr_t ra);
+void ext_stl_mmu(target_ulong addr, uint32_t val, int mmu_idx, uintptr_t ra);
+uint64_t ext_ldq_mmu(target_ulong addr, int mmu_idx, uintptr_t ra);
+void ext_stq_mmu(target_ulong addr, uint64_t val, int mmu_idx, uintptr_t ra);
+
+uint8_t ext_ldb_cmmu(target_ulong addr, int mmu_idx, uintptr_t ra);
+void ext_stb_cmmu(target_ulong addr, uint8_t val, int mmu_idx, uintptr_t ra);
+uint16_t ext_ldw_cmmu(target_ulong addr, int mmu_idx, uintptr_t ra);
+void ext_stw_cmmu(target_ulong addr, uint16_t val, int mmu_idx, uintptr_t ra);
+uint32_t ext_ldl_cmmu(target_ulong addr, int mmu_idx, uintptr_t ra);
+void ext_stl_cmmu(target_ulong addr, uint32_t val, int mmu_idx, uintptr_t ra);
+uint64_t ext_ldq_cmmu(target_ulong addr, int mmu_idx, uintptr_t ra);
+void ext_stq_cmmu(target_ulong addr, uint64_t val, int mmu_idx, uintptr_t ra);
+#endif  /* !CONFIG_QEMU_LDST_OPTIMIZATION */
+
+#else
+
+#ifndef CONFIG_QEMU_LDST_OPTIMIZATION
 uint8_t helper_ldb_mmu(CPUArchState *env, target_ulong addr, int mmu_idx);
 void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
                     int mmu_idx);
@@ -53,6 +79,44 @@  void helper_stl_cmmu(CPUArchState *env, target_ulong addr, uint32_t val,
 uint64_t helper_ldq_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx);
 void helper_stq_cmmu(CPUArchState *env, target_ulong addr, uint64_t val,
                      int mmu_idx);
+#else
+/* Extended versions of MMU helpers for qemu_ld/st optimization.
+   The additional argument is a host code address accessing guest memory */
+uint8_t ext_helper_ldb_mmu(CPUArchState *env, target_ulong addr, int mmu_idx,
+                           uintptr_t ra);
+void ext_helper_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
+                        int mmu_idx, uintptr_t ra);
+uint16_t ext_helper_ldw_mmu(CPUArchState *env, target_ulong addr, int mmu_idx,
+                            uintptr_t ra);
+void ext_helper_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
+                        int mmu_idx, uintptr_t ra);
+uint32_t ext_helper_ldl_mmu(CPUArchState *env, target_ulong addr, int mmu_idx,
+                            uintptr_t ra);
+void ext_helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
+                        int mmu_idx, uintptr_t ra);
+uint64_t ext_helper_ldq_mmu(CPUArchState *env, target_ulong addr, int mmu_idx,
+                            uintptr_t ra);
+void ext_helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
+                        int mmu_idx, uintptr_t ra);
+
+uint8_t ext_helper_ldb_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx,
+                            uintptr_t ra);
+void ext_helper_stb_cmmu(CPUArchState *env, target_ulong addr, uint8_t val,
+                         int mmu_idx, uintptr_t ra);
+uint16_t ext_helper_ldw_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx,
+                             uintptr_t ra);
+void ext_helper_stw_cmmu(CPUArchState *env, target_ulong addr, uint16_t val,
+                         int mmu_idx, uintptr_t ra);
+uint32_t ext_helper_ldl_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx,
+                             uintptr_t ra);
+void ext_helper_stl_cmmu(CPUArchState *env, target_ulong addr, uint32_t val,
+                         int mmu_idx, uintptr_t ra);
+uint64_t ext_helper_ldq_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx,
+                             uintptr_t ra);
+void ext_helper_stq_cmmu(CPUArchState *env, target_ulong addr, uint64_t val,
+                         int mmu_idx, uintptr_t ra);
+#endif  /* !CONFIG_QEMU_LDST_OPTIMIZATION */
+
 #endif
 
 #endif
diff --git a/softmmu_header.h b/softmmu_header.h
index cf1aa38..07852c4 100644
--- a/softmmu_header.h
+++ b/softmmu_header.h
@@ -82,12 +82,20 @@ 
 #define ENV_PARAM
 #define ENV_VAR
 #define CPU_PREFIX
+#ifdef CONFIG_QEMU_LDST_OPTIMIZATION
+#define HELPER_PREFIX ext_
+#else
 #define HELPER_PREFIX __
+#endif  /* CONFIG_QEMU_LDST_OPTIMIZATION */
 #else
 #define ENV_PARAM CPUArchState *env,
 #define ENV_VAR env,
 #define CPU_PREFIX cpu_
+#ifdef CONFIG_QEMU_LDST_OPTIMIZATION
+#define HELPER_PREFIX ext_helper_
+#else
 #define HELPER_PREFIX helper_
+#endif  /* CONFIG_QEMU_LDST_OPTIMIZATION */
 #endif
 
 /* generic load/store macros */
@@ -106,9 +114,16 @@  glue(glue(glue(CPU_PREFIX, ld), USUFFIX), MEMSUFFIX)(ENV_PARAM
     mmu_idx = CPU_MMU_INDEX;
     if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
+#ifdef CONFIG_QEMU_LDST_OPTIMIZATION
+        /* XXX: This macro branching is due to checkpatch.pl which
+           doesn't allow "#define RET_VAR , (uintptr_t)NULL" */
+        res = glue(glue(glue(HELPER_PREFIX, ld), SUFFIX),
+                   MMUSUFFIX)(ENV_VAR addr, mmu_idx, (uintptr_t)NULL);
+#else
         res = glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_VAR
                                                                      addr,
                                                                      mmu_idx);
+#endif  /* CONFIG_QEMU_LDST_OPTIMIZATION */
     } else {
         uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
         res = glue(glue(ld, USUFFIX), _raw)(hostaddr);
@@ -130,8 +145,16 @@  glue(glue(glue(CPU_PREFIX, lds), SUFFIX), MEMSUFFIX)(ENV_PARAM
     mmu_idx = CPU_MMU_INDEX;
     if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
+#ifdef CONFIG_QEMU_LDST_OPTIMIZATION
+        /* XXX: This macro branching is due to checkpatch.pl which
+           doesn't allow "#define RET_VAR , (uintptr_t)NULL" */
+        res = (DATA_STYPE)glue(glue(glue(HELPER_PREFIX, ld), SUFFIX),
+                               MMUSUFFIX)(ENV_VAR addr, mmu_idx,
+                                          (uintptr_t)NULL);
+#else
         res = (DATA_STYPE)glue(glue(glue(HELPER_PREFIX, ld), SUFFIX),
                                MMUSUFFIX)(ENV_VAR addr, mmu_idx);
+#endif  /* CONFIG_QEMU_LDST_OPTIMIZATION */
     } else {
         uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
         res = glue(glue(lds, SUFFIX), _raw)(hostaddr);
@@ -157,8 +180,16 @@  glue(glue(glue(CPU_PREFIX, st), SUFFIX), MEMSUFFIX)(ENV_PARAM target_ulong ptr,
     mmu_idx = CPU_MMU_INDEX;
     if (unlikely(env->tlb_table[mmu_idx][page_index].addr_write !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
+#ifdef CONFIG_QEMU_LDST_OPTIMIZATION
+        /* XXX: This macro branching is due to checkpatch.pl which
+           doesn't allow "#define RET_VAR , (uintptr_t)NULL" */
+        glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_VAR addr, v,
+                                                               mmu_idx,
+                                                               (uintptr_t)NULL);
+#else
         glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_VAR addr, v,
                                                                mmu_idx);
+#endif  /* CONFIG_QEMU_LDST_OPTIMIZATION */
     } else {
         uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
         glue(glue(st, SUFFIX), _raw)(hostaddr, v);
diff --git a/softmmu_template.h b/softmmu_template.h
index b8bd700..5096c63 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -66,6 +66,21 @@ 
 #define HELPER_PREFIX helper_
 #endif
 
+#ifdef CONFIG_QEMU_LDST_OPTIMIZATION
+#undef HELPER_PREFIX
+/* Redefine helper prefix */
+#ifndef CONFIG_TCG_PASS_AREG0
+#define HELPER_PREFIX ext_
+#else
+#define HELPER_PREFIX ext_helper_
+#endif
+/* An extended MMU helper takes one more argument which is
+   a host address of generated code accessing guest memory */
+#define GET_RET_ADDR() ra
+#else
+#define GET_RET_ADDR() GETPC()
+#endif  /* CONFIG_QEMU_LDST_OPTIMIZATION */
+
 static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_PARAM
                                                         target_ulong addr,
                                                         int mmu_idx,
@@ -103,10 +118,20 @@  static inline DATA_TYPE glue(io_read, SUFFIX)(ENV_PARAM
 }
 
 /* handle all cases except unaligned access which span two pages */
+#ifdef CONFIG_QEMU_LDST_OPTIMIZATION
+/* XXX: This macro branching is due to checkpatch.pl which doesn't allow
+   "#define RET_PARAM , uintptr_r ra" */
+DATA_TYPE
+glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_PARAM
+                                                       target_ulong addr,
+                                                       int mmu_idx,
+                                                       uintptr_t ra)
+#else
 DATA_TYPE
 glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_PARAM
                                                        target_ulong addr,
                                                        int mmu_idx)
+#endif  /* CONFIG_QEMU_LDST_OPTIMIZATION */
 {
     DATA_TYPE res;
     int index;
@@ -124,13 +149,13 @@  glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_PARAM
             /* IO access */
             if ((addr & (DATA_SIZE - 1)) != 0)
                 goto do_unaligned_access;
-            retaddr = GETPC();
+            retaddr = GET_RET_ADDR();
             ioaddr = env->iotlb[mmu_idx][index];
             res = glue(io_read, SUFFIX)(ENV_VAR ioaddr, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
             /* slow unaligned access (it spans two pages or IO) */
         do_unaligned_access:
-            retaddr = GETPC();
+            retaddr = GET_RET_ADDR();
 #ifdef ALIGNED_ONLY
             do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
 #endif
@@ -141,7 +166,7 @@  glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_PARAM
             uintptr_t addend;
 #ifdef ALIGNED_ONLY
             if ((addr & (DATA_SIZE - 1)) != 0) {
-                retaddr = GETPC();
+                retaddr = GET_RET_ADDR();
                 do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
             }
 #endif
@@ -151,7 +176,7 @@  glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_PARAM
         }
     } else {
         /* the page is not in the TLB : fill it */
-        retaddr = GETPC();
+        retaddr = GET_RET_ADDR();
 #ifdef ALIGNED_ONLY
         if ((addr & (DATA_SIZE - 1)) != 0)
             do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
@@ -253,10 +278,20 @@  static inline void glue(io_write, SUFFIX)(ENV_PARAM
 #endif /* SHIFT > 2 */
 }
 
+#ifdef CONFIG_QEMU_LDST_OPTIMIZATION
+/* XXX: This macro branching is due to checkpatch.pl which doesn't allow
+   "#define RET_PARAM , uintptr_t ra" */
+void glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_PARAM
+                                                            target_ulong addr,
+                                                            DATA_TYPE val,
+                                                            int mmu_idx,
+                                                            uintptr_t ra)
+#else
 void glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_PARAM
                                                             target_ulong addr,
                                                             DATA_TYPE val,
                                                             int mmu_idx)
+#endif  /* CONFIG_QEMU_LDST_OPTIMIZATION */
 {
     target_phys_addr_t ioaddr;
     target_ulong tlb_addr;
@@ -271,12 +306,12 @@  void glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_PARAM
             /* IO access */
             if ((addr & (DATA_SIZE - 1)) != 0)
                 goto do_unaligned_access;
-            retaddr = GETPC();
+            retaddr = GET_RET_ADDR();
             ioaddr = env->iotlb[mmu_idx][index];
             glue(io_write, SUFFIX)(ENV_VAR ioaddr, val, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
-            retaddr = GETPC();
+            retaddr = GET_RET_ADDR();
 #ifdef ALIGNED_ONLY
             do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr);
 #endif
@@ -287,7 +322,7 @@  void glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_PARAM
             uintptr_t addend;
 #ifdef ALIGNED_ONLY
             if ((addr & (DATA_SIZE - 1)) != 0) {
-                retaddr = GETPC();
+                retaddr = GET_RET_ADDR();
                 do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr);
             }
 #endif
@@ -297,7 +332,7 @@  void glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_PARAM
         }
     } else {
         /* the page is not in the TLB : fill it */
-        retaddr = GETPC();
+        retaddr = GET_RET_ADDR();
 #ifdef ALIGNED_ONLY
         if ((addr & (DATA_SIZE - 1)) != 0)
             do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr);
@@ -370,3 +405,4 @@  static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(ENV_PARAM
 #undef ENV_VAR
 #undef CPU_PREFIX
 #undef HELPER_PREFIX
+#undef GET_RET_ADDR