Patchwork [1/7] exec: Reorganize the GETRA/GETPC macros

login
register
mail settings
Submitter Richard Henderson
Date Aug. 27, 2013, 9:46 p.m.
Message ID <1377639991-16028-2-git-send-email-rth@twiddle.net>
Download mbox | patch
Permalink /patch/270258/
State New
Headers show

Comments

Richard Henderson - Aug. 27, 2013, 9:46 p.m.
Always define GETRA; use __builtin_extract_return_addr, rather than
having a special case for s390.  Split GETPC_ADJ out of GETPC; use 2
universally, rather than having a special case for arm.

Rename GETPC_LDST to GETRA_LDST to indicate that it does not
contain the GETPC_ADJ value.  Likewise with GETPC_EXT to GETRA_EXT.

Perform the GETPC_ADJ adjustment inside helper_ret_ld/st.  This will
allow backends to pass along the "true" return address rather than
the massaged GETPC value.  In the meantime, double application of
GETPC_ADJ does not hurt, since the call insn in all ISAs is at least
4 bytes long.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 include/exec/exec-all.h         | 87 +++++++++++++++++++----------------------
 include/exec/softmmu_template.h | 24 ++++++++----
 2 files changed, 58 insertions(+), 53 deletions(-)

Patch

diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index b70028a..32d0204 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -295,49 +295,42 @@  static inline void tb_add_jump(TranslationBlock *tb, int n,
     }
 }
 
-/* The return address may point to the start of the next instruction.
-   Subtracting one gets us the call instruction itself.  */
+/* GETRA is the true target of the return instruction that we'll execute,
+   defined here for simplicity of defining the follow-up macros.  */
 #if defined(CONFIG_TCG_INTERPRETER)
 extern uintptr_t tci_tb_ptr;
-# define GETPC() tci_tb_ptr
-#elif defined(__s390__) && !defined(__s390x__)
-# define GETPC() \
-    (((uintptr_t)__builtin_return_address(0) & 0x7fffffffUL) - 1)
-#elif defined(__arm__)
-/* Thumb return addresses have the low bit set, so we need to subtract two.
-   This is still safe in ARM mode because instructions are 4 bytes.  */
-# define GETPC() ((uintptr_t)__builtin_return_address(0) - 2)
+# define GETRA() tci_tb_ptr
+#else
+# define GETRA() \
+    ((uintptr_t)__builtin_extract_return_addr(__builtin_return_address(0)))
+#endif
+
+/* The true return address will often point to a host insn that is part of
+   the next translated guest insn.  Adjust the address backward to point to
+   the middle of the call insn.  Subtracting one would do the job except for
+   several compressed mode architectures (arm, mips) which set the low bit
+   to indicate the compressed mode; subtracting two works around that.  It
+   is also the case that there are no host isas that contain a call insn
+   smaller than 4 bytes, so we don't worry about special-casing this.  */
+#if defined(CONFIG_TCG_INTERPRETER)
+# define GETPC_ADJ   0
 #else
-# define GETPC() ((uintptr_t)__builtin_return_address(0) - 1)
+# define GETPC_ADJ   2
 #endif
 
+#define GETPC()  (GETRA() - GETPC_ADJ)
+
+/* The LDST optimizations splits code generation into fast and slow path.
+   In some implementations, we pass the "logical" return address manually;
+   in others, we must infer the logical return from the true return.  */
 #if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
-/* qemu_ld/st optimization split code generation to fast and slow path, thus,
-   it needs special handling for an MMU helper which is called from the slow
-   path, to get the fast path's pc without any additional argument.
-   It uses a tricky solution which embeds the fast path pc into the slow path.
-
-   Code flow in slow path:
-   (1) pre-process
-   (2) call MMU helper
-   (3) jump to (5)
-   (4) fast path information (implementation specific)
-   (5) post-process (e.g. stack adjust)
-   (6) jump to corresponding code of the next of fast path
- */
-# if defined(__i386__) || defined(__x86_64__)
-#  define GETRA() ((uintptr_t)__builtin_return_address(0))
-/* The return address argument for ldst is passed directly.  */
-#  define GETPC_LDST()  (abort(), 0)
-# elif defined (_ARCH_PPC) && !defined (_ARCH_PPC64)
-#  define GETRA() ((uintptr_t)__builtin_return_address(0))
-#  define GETPC_LDST() ((uintptr_t) ((*(int32_t *)(GETRA() - 4)) - 1))
+# if defined (_ARCH_PPC) && !defined (_ARCH_PPC64)
+#  define GETRA_LDST(RA)   (*(int32_t *)((RA) - 4))
 # elif defined(__arm__)
 /* We define two insns between the return address and the branch back to
    straight-line.  Find and decode that branch insn.  */
-#  define GETRA()       ((uintptr_t)__builtin_return_address(0))
-#  define GETPC_LDST()  tcg_getpc_ldst(GETRA())
-static inline uintptr_t tcg_getpc_ldst(uintptr_t ra)
+#  define GETRA_LDST(RA)   tcg_getra_ldst(RA)
+static inline uintptr_t tcg_getra_ldst(uintptr_t ra)
 {
     int32_t b;
     ra += 8;                    /* skip the two insns */
@@ -345,31 +338,33 @@  static inline uintptr_t tcg_getpc_ldst(uintptr_t ra)
     b = (b << 8) >> (8 - 2);    /* extract the displacement */
     ra += 8;                    /* branches are relative to pc+8 */
     ra += b;                    /* apply the displacement */
-    ra -= 4;                    /* return a pointer into the current opcode,
-                                   not the start of the next opcode  */
     return ra;
 }
-#elif defined(__aarch64__)
-#  define GETRA()       ((uintptr_t)__builtin_return_address(0))
-#  define GETPC_LDST()  tcg_getpc_ldst(GETRA())
-static inline uintptr_t tcg_getpc_ldst(uintptr_t ra)
+# elif defined(__aarch64__)
+#  define GETRA_LDST(RA)  tcg_getra_ldst(RA)
+static inline uintptr_t tcg_getra_ldst(uintptr_t ra)
 {
     int32_t b;
     ra += 4;                    /* skip one instruction */
     b = *(int32_t *)ra;         /* load the branch insn */
     b = (b << 6) >> (6 - 2);    /* extract the displacement */
     ra += b;                    /* apply the displacement  */
-    ra -= 4;                    /* return a pointer into the current opcode,
-                                   not the start of the next opcode  */
     return ra;
 }
-# else
-#  error "CONFIG_QEMU_LDST_OPTIMIZATION needs GETPC_LDST() implementation!"
 # endif
+#endif /* CONFIG_QEMU_LDST_OPTIMIZATION */
+
+/* ??? This declaration really ought to be in tcg.h.  */
 bool is_tcg_gen_code(uintptr_t pc_ptr);
-# define GETPC_EXT() (is_tcg_gen_code(GETRA()) ? GETPC_LDST() : GETPC())
+
+#ifdef GETRA_LDST
+# define GETRA_EXT()  tcg_getra_ext(GETRA())
+static inline uintptr_t  tcg_getra_ext(uintptr_t ra)
+{
+    return is_tcg_gen_code(ra) ? GETRA_LDST(ra) : ra;
+}
 #else
-# define GETPC_EXT() GETPC()
+# define GETRA_EXT()  GETRA()
 #endif
 
 #if !defined(CONFIG_USER_ONLY)
diff --git a/include/exec/softmmu_template.h b/include/exec/softmmu_template.h
index eaca9e1..2fc6ea3 100644
--- a/include/exec/softmmu_template.h
+++ b/include/exec/softmmu_template.h
@@ -86,6 +86,9 @@  glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env,
     target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
     uintptr_t haddr;
 
+    /* Adjust the given return address.  */
+    retaddr -= GETPC_ADJ;
+
     /* If the TLB entry is for a different page, reload and try again.  */
     if ((addr & TARGET_PAGE_MASK)
          != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
@@ -121,10 +124,12 @@  glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env,
 #endif
         addr1 = addr & ~(DATA_SIZE - 1);
         addr2 = addr1 + DATA_SIZE;
-        res1 = glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)(env, addr1,
-                                                            mmu_idx, retaddr);
-        res2 = glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)(env, addr2,
-                                                            mmu_idx, retaddr);
+        /* Note the adjustment at the beginning of the function.
+           Undo that for the recursion.  */
+        res1 = glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)
+            (env, addr1, mmu_idx, retaddr + GETPC_ADJ);
+        res2 = glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)
+            (env, addr2, mmu_idx, retaddr + GETPC_ADJ);
         shift = (addr & (DATA_SIZE - 1)) * 8;
 #ifdef TARGET_WORDS_BIGENDIAN
         res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift));
@@ -150,7 +155,7 @@  glue(glue(helper_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
                                          int mmu_idx)
 {
     return glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)(env, addr, mmu_idx,
-                                                        GETPC_EXT());
+                                                        GETRA_EXT());
 }
 
 #ifndef SOFTMMU_CODE_ACCESS
@@ -182,6 +187,9 @@  glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
     target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
     uintptr_t haddr;
 
+    /* Adjust the given return address.  */
+    retaddr -= GETPC_ADJ;
+
     /* If the TLB entry is for a different page, reload and try again.  */
     if ((addr & TARGET_PAGE_MASK)
         != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
@@ -223,8 +231,10 @@  glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
 #else
             uint8_t val8 = val >> (i * 8);
 #endif
+            /* Note the adjustment at the beginning of the function.
+               Undo that for the recursion.  */
             glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
-                                            mmu_idx, retaddr);
+                                            mmu_idx, retaddr + GETPC_ADJ);
         }
         return;
     }
@@ -245,7 +255,7 @@  glue(glue(helper_st, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
                                          DATA_TYPE val, int mmu_idx)
 {
     glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, val, mmu_idx,
-                                                 GETPC_EXT());
+                                                 GETRA_EXT());
 }
 
 #endif /* !defined(SOFTMMU_CODE_ACCESS) */