diff mbox

[RESEND,v6,2/3] tcg: Add extended GETPC mechanism for MMU helpers with ldst optimization

Message ID 1350716743-2812-3-git-send-email-yeongkyoon.lee@samsung.com
State New
Headers show

Commit Message

YeongKyoon Lee Oct. 20, 2012, 7:05 a.m. UTC
Add GETPC_EXT which is used by MMU helpers to selectively calculate the code
address of accessing guest memory when called from a qemu_ld/st optimized code
or a C function. Currently, it supports only i386 and x86-64 hosts.

Signed-off-by: Yeongkyoon Lee <yeongkyoon.lee@samsung.com>

---
 exec-all.h         |   36 ++++++++++++++++++++++++++++++++++++
 exec.c             |   11 +++++++++++
 softmmu_template.h |   16 ++++++++--------
 3 files changed, 55 insertions(+), 8 deletions(-)

--
1.7.5.4

Comments

陳韋任 Oct. 23, 2012, 3:25 p.m. UTC | #1
Hi Yeongkyoon,

> +#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
> +/* check whether the give addr is in TCG generated code buffer or not */
                        ^^^^

  should be given.

Cheers,
chenwj
YeongKyoon Lee Oct. 24, 2012, 3:11 a.m. UTC | #2
On 2012년 10월 24일 00:25, 陳韋任 (Wei-Ren Chen) wrote:
> Hi Yeongkyoon,
>
>> +#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
>> +/* check whether the give addr is in TCG generated code buffer or not */
>                          ^^^^
>
>    should be given.
>
> Cheers,
> chenwj
>

Thanks a lot, Wei-Ren.
Probably I should send the next version of the patch.
陳韋任 Oct. 24, 2012, 3:14 a.m. UTC | #3
On Wed, Oct 24, 2012 at 12:11:23PM +0900, Yeongkyoon Lee wrote:
> On 2012년 10월 24일 00:25, 陳韋任 (Wei-Ren Chen) wrote:
> > Hi Yeongkyoon,
> >
> >> +#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
> >> +/* check whether the give addr is in TCG generated code buffer or not */
> >                          ^^^^
> >
> >    should be given.
> >
> > Cheers,
> > chenwj
> >
> 
> Thanks a lot, Wei-Ren.
> Probably I should send the next version of the patch.

  Or someone who commits this patchset for you can fix typo so that
you don't have to resend it. ;)

Cheers,
chenwj
Blue Swirl Oct. 27, 2012, 2:29 p.m. UTC | #4
On Sat, Oct 20, 2012 at 7:05 AM, Yeongkyoon Lee
<yeongkyoon.lee@samsung.com> wrote:
> Add GETPC_EXT which is used by MMU helpers to selectively calculate the code
> address of accessing guest memory when called from a qemu_ld/st optimized code
> or a C function. Currently, it supports only i386 and x86-64 hosts.
>
> Signed-off-by: Yeongkyoon Lee <yeongkyoon.lee@samsung.com>
>
> ---
>  exec-all.h         |   36 ++++++++++++++++++++++++++++++++++++

Patch does not apply, please rebase:
Applying: tcg: Add extended GETPC mechanism for MMU helpers with ldst
optimization
error: patch failed: exec-all.h:311
error: exec-all.h: patch does not apply
Patch failed at 0002 tcg: Add extended GETPC mechanism for MMU helpers
with ldst optimization

>  exec.c             |   11 +++++++++++
>  softmmu_template.h |   16 ++++++++--------
>  3 files changed, 55 insertions(+), 8 deletions(-)
>
> diff --git a/exec-all.h b/exec-all.h
> index 6516da0..9eda604 100644
> --- a/exec-all.h
> +++ b/exec-all.h
> @@ -311,6 +311,42 @@ extern uintptr_t tci_tb_ptr;
>  # define GETPC() ((uintptr_t)__builtin_return_address(0) - 1)
>  #endif
>
> +#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
> +/* qemu_ld/st optimization split code generation to fast and slow path, thus,
> +   it needs special handling for an MMU helper which is called from the slow
> +   path, to get the fast path's pc without any additional argument.
> +   It uses a tricky solution which embeds the fast path pc into the slow path.
> +
> +   Code flow in slow path:
> +   (1) pre-process
> +   (2) call MMU helper
> +   (3) jump to (5)
> +   (4) fast path information (implementation specific)
> +   (5) post-process (e.g. stack adjust)
> +   (6) jump to corresponding code of the next of fast path
> + */
> +# if defined(__i386__) || defined(__x86_64__)
> +/* To avoid broken disassembling, long jmp is used for embedding fast path pc,
> +   so that the destination is the next code of fast path, though this jmp is
> +   never executed.
> +
> +   call MMU helper
> +   jmp POST_PROC (2byte)    <- GETRA()
> +   jmp NEXT_CODE (5byte)
> +   POST_PROCESS ...         <- GETRA() + 7
> + */
> +#  define GETRA() ((uintptr_t)__builtin_return_address(0))
> +#  define GETPC_LDST() ((uintptr_t)(GETRA() + 7 + \
> +                                    *(int32_t *)((void *)GETRA() + 3) - 1))
> +# else
> +#  error "CONFIG_QEMU_LDST_OPTIMIZATION needs GETPC_LDST() implementation!"
> +# endif
> +bool is_tcg_gen_code(uintptr_t pc_ptr);
> +# define GETPC_EXT() (is_tcg_gen_code(GETRA()) ? GETPC_LDST() : GETPC())
> +#else
> +# define GETPC_EXT() GETPC()
> +#endif
> +
>  #if !defined(CONFIG_USER_ONLY)
>
>  struct MemoryRegion *iotlb_to_region(target_phys_addr_t index);
> diff --git a/exec.c b/exec.c
> index 7899042..8a825a9 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -1379,6 +1379,17 @@ void tb_link_page(TranslationBlock *tb,
>      mmap_unlock();
>  }
>
> +#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
> +/* check whether the give addr is in TCG generated code buffer or not */
> +bool is_tcg_gen_code(uintptr_t tc_ptr)
> +{
> +    /* This can be called during code generation, code_gen_buffer_max_size
> +       is used instead of code_gen_ptr for upper boundary checking */
> +    return (tc_ptr >= (uintptr_t)code_gen_buffer &&
> +            tc_ptr < (uintptr_t)(code_gen_buffer + code_gen_buffer_max_size));
> +}
> +#endif
> +
>  /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
>     tb[1].tc_ptr. Return NULL if not found */
>  TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
> diff --git a/softmmu_template.h b/softmmu_template.h
> index e2490f0..d23de8c 100644
> --- a/softmmu_template.h
> +++ b/softmmu_template.h
> @@ -111,13 +111,13 @@ glue(glue(helper_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
>              /* IO access */
>              if ((addr & (DATA_SIZE - 1)) != 0)
>                  goto do_unaligned_access;
> -            retaddr = GETPC();
> +            retaddr = GETPC_EXT();
>              ioaddr = env->iotlb[mmu_idx][index];
>              res = glue(io_read, SUFFIX)(env, ioaddr, addr, retaddr);
>          } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
>              /* slow unaligned access (it spans two pages or IO) */
>          do_unaligned_access:
> -            retaddr = GETPC();
> +            retaddr = GETPC_EXT();
>  #ifdef ALIGNED_ONLY
>              do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
>  #endif
> @@ -128,7 +128,7 @@ glue(glue(helper_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
>              uintptr_t addend;
>  #ifdef ALIGNED_ONLY
>              if ((addr & (DATA_SIZE - 1)) != 0) {
> -                retaddr = GETPC();
> +                retaddr = GETPC_EXT();
>                  do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
>              }
>  #endif
> @@ -138,7 +138,7 @@ glue(glue(helper_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
>          }
>      } else {
>          /* the page is not in the TLB : fill it */
> -        retaddr = GETPC();
> +        retaddr = GETPC_EXT();
>  #ifdef ALIGNED_ONLY
>          if ((addr & (DATA_SIZE - 1)) != 0)
>              do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
> @@ -257,12 +257,12 @@ void glue(glue(helper_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
>              /* IO access */
>              if ((addr & (DATA_SIZE - 1)) != 0)
>                  goto do_unaligned_access;
> -            retaddr = GETPC();
> +            retaddr = GETPC_EXT();
>              ioaddr = env->iotlb[mmu_idx][index];
>              glue(io_write, SUFFIX)(env, ioaddr, val, addr, retaddr);
>          } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
>          do_unaligned_access:
> -            retaddr = GETPC();
> +            retaddr = GETPC_EXT();
>  #ifdef ALIGNED_ONLY
>              do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
>  #endif
> @@ -273,7 +273,7 @@ void glue(glue(helper_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
>              uintptr_t addend;
>  #ifdef ALIGNED_ONLY
>              if ((addr & (DATA_SIZE - 1)) != 0) {
> -                retaddr = GETPC();
> +                retaddr = GETPC_EXT();
>                  do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
>              }
>  #endif
> @@ -283,7 +283,7 @@ void glue(glue(helper_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
>          }
>      } else {
>          /* the page is not in the TLB : fill it */
> -        retaddr = GETPC();
> +        retaddr = GETPC_EXT();
>  #ifdef ALIGNED_ONLY
>          if ((addr & (DATA_SIZE - 1)) != 0)
>              do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
> --
> 1.7.5.4
YeongKyoon Lee Oct. 29, 2012, 5:26 a.m. UTC | #5
On 2012년 10월 27일 23:29, Blue Swirl wrote:
> On Sat, Oct 20, 2012 at 7:05 AM, Yeongkyoon Lee
> <yeongkyoon.lee@samsung.com> wrote:
>> Add GETPC_EXT which is used by MMU helpers to selectively calculate the code
>> address of accessing guest memory when called from a qemu_ld/st optimized code
>> or a C function. Currently, it supports only i386 and x86-64 hosts.
>>
>> Signed-off-by: Yeongkyoon Lee <yeongkyoon.lee@samsung.com>
>>
>> ---
>>   exec-all.h         |   36 ++++++++++++++++++++++++++++++++++++
> Patch does not apply, please rebase:
> Applying: tcg: Add extended GETPC mechanism for MMU helpers with ldst
> optimization
> error: patch failed: exec-all.h:311
> error: exec-all.h: patch does not apply
> Patch failed at 0002 tcg: Add extended GETPC mechanism for MMU helpers
> with ldst optimization

Thanks.
I'll send new series with rebasing.
diff mbox

Patch

diff --git a/exec-all.h b/exec-all.h
index 6516da0..9eda604 100644
--- a/exec-all.h
+++ b/exec-all.h
@@ -311,6 +311,42 @@  extern uintptr_t tci_tb_ptr;
 # define GETPC() ((uintptr_t)__builtin_return_address(0) - 1)
 #endif

+#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
+/* qemu_ld/st optimization split code generation to fast and slow path, thus,
+   it needs special handling for an MMU helper which is called from the slow
+   path, to get the fast path's pc without any additional argument.
+   It uses a tricky solution which embeds the fast path pc into the slow path.
+
+   Code flow in slow path:
+   (1) pre-process
+   (2) call MMU helper
+   (3) jump to (5)
+   (4) fast path information (implementation specific)
+   (5) post-process (e.g. stack adjust)
+   (6) jump to corresponding code of the next of fast path
+ */
+# if defined(__i386__) || defined(__x86_64__)
+/* To avoid broken disassembling, long jmp is used for embedding fast path pc,
+   so that the destination is the next code of fast path, though this jmp is
+   never executed.
+
+   call MMU helper
+   jmp POST_PROC (2byte)    <- GETRA()
+   jmp NEXT_CODE (5byte)
+   POST_PROCESS ...         <- GETRA() + 7
+ */
+#  define GETRA() ((uintptr_t)__builtin_return_address(0))
+#  define GETPC_LDST() ((uintptr_t)(GETRA() + 7 + \
+                                    *(int32_t *)((void *)GETRA() + 3) - 1))
+# else
+#  error "CONFIG_QEMU_LDST_OPTIMIZATION needs GETPC_LDST() implementation!"
+# endif
+bool is_tcg_gen_code(uintptr_t pc_ptr);
+# define GETPC_EXT() (is_tcg_gen_code(GETRA()) ? GETPC_LDST() : GETPC())
+#else
+# define GETPC_EXT() GETPC()
+#endif
+
 #if !defined(CONFIG_USER_ONLY)

 struct MemoryRegion *iotlb_to_region(target_phys_addr_t index);
diff --git a/exec.c b/exec.c
index 7899042..8a825a9 100644
--- a/exec.c
+++ b/exec.c
@@ -1379,6 +1379,17 @@  void tb_link_page(TranslationBlock *tb,
     mmap_unlock();
 }

+#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
+/* check whether the give addr is in TCG generated code buffer or not */
+bool is_tcg_gen_code(uintptr_t tc_ptr)
+{
+    /* This can be called during code generation, code_gen_buffer_max_size
+       is used instead of code_gen_ptr for upper boundary checking */
+    return (tc_ptr >= (uintptr_t)code_gen_buffer &&
+            tc_ptr < (uintptr_t)(code_gen_buffer + code_gen_buffer_max_size));
+}
+#endif
+
 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
    tb[1].tc_ptr. Return NULL if not found */
 TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
diff --git a/softmmu_template.h b/softmmu_template.h
index e2490f0..d23de8c 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -111,13 +111,13 @@  glue(glue(helper_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
             /* IO access */
             if ((addr & (DATA_SIZE - 1)) != 0)
                 goto do_unaligned_access;
-            retaddr = GETPC();
+            retaddr = GETPC_EXT();
             ioaddr = env->iotlb[mmu_idx][index];
             res = glue(io_read, SUFFIX)(env, ioaddr, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
             /* slow unaligned access (it spans two pages or IO) */
         do_unaligned_access:
-            retaddr = GETPC();
+            retaddr = GETPC_EXT();
 #ifdef ALIGNED_ONLY
             do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
 #endif
@@ -128,7 +128,7 @@  glue(glue(helper_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
             uintptr_t addend;
 #ifdef ALIGNED_ONLY
             if ((addr & (DATA_SIZE - 1)) != 0) {
-                retaddr = GETPC();
+                retaddr = GETPC_EXT();
                 do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
             }
 #endif
@@ -138,7 +138,7 @@  glue(glue(helper_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
         }
     } else {
         /* the page is not in the TLB : fill it */
-        retaddr = GETPC();
+        retaddr = GETPC_EXT();
 #ifdef ALIGNED_ONLY
         if ((addr & (DATA_SIZE - 1)) != 0)
             do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
@@ -257,12 +257,12 @@  void glue(glue(helper_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
             /* IO access */
             if ((addr & (DATA_SIZE - 1)) != 0)
                 goto do_unaligned_access;
-            retaddr = GETPC();
+            retaddr = GETPC_EXT();
             ioaddr = env->iotlb[mmu_idx][index];
             glue(io_write, SUFFIX)(env, ioaddr, val, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
-            retaddr = GETPC();
+            retaddr = GETPC_EXT();
 #ifdef ALIGNED_ONLY
             do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
 #endif
@@ -273,7 +273,7 @@  void glue(glue(helper_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
             uintptr_t addend;
 #ifdef ALIGNED_ONLY
             if ((addr & (DATA_SIZE - 1)) != 0) {
-                retaddr = GETPC();
+                retaddr = GETPC_EXT();
                 do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
             }
 #endif
@@ -283,7 +283,7 @@  void glue(glue(helper_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
         }
     } else {
         /* the page is not in the TLB : fill it */
-        retaddr = GETPC();
+        retaddr = GETPC_EXT();
 #ifdef ALIGNED_ONLY
         if ((addr & (DATA_SIZE - 1)) != 0)
             do_unaligned_access(env, addr, 1, mmu_idx, retaddr);