Patchwork translate: optimize gen_intermediate_code_internal

login
register
mail settings
Submitter liguang
Date April 9, 2013, 3:45 a.m.
Message ID <1365479139-18737-1-git-send-email-lig.fnst@cn.fujitsu.com>
Download mbox | patch
Permalink /patch/234951/
State New
Headers show

Comments

liguang - April 9, 2013, 3:45 a.m.
Signed-off-by: liguang <lig.fnst@cn.fujitsu.com>
---
 target-arm/translate.c  |   17 ++++++++---------
 target-i386/translate.c |   17 ++++++++---------
 target-mips/translate.c |   16 ++++++++--------
 3 files changed, 24 insertions(+), 26 deletions(-)
陳韋任 - April 9, 2013, 8:05 a.m.
Hi liguang,

  Just to be curious, how much performance improvement this patch can get?

Regards,
chenwj

On Tue, Apr 09, 2013 at 11:45:39AM +0800, liguang wrote:
> Signed-off-by: liguang <lig.fnst@cn.fujitsu.com>
> ---
>  target-arm/translate.c  |   17 ++++++++---------
>  target-i386/translate.c |   17 ++++++++---------
>  target-mips/translate.c |   16 ++++++++--------
>  3 files changed, 24 insertions(+), 26 deletions(-)
> 
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index 35a21be..c0c080d 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -9806,11 +9806,10 @@ static inline void gen_intermediate_code_internal(CPUARMState *env,
>      cpu_M0 = tcg_temp_new_i64();
>      next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
>      lj = -1;
> -    num_insns = 0;
>      max_insns = tb->cflags & CF_COUNT_MASK;
> -    if (max_insns == 0)
> +    if (max_insns == 0) {
>          max_insns = CF_COUNT_MASK;
> -
> +    }
>      gen_tb_start();
>  
>      tcg_clear_temp_count();
> @@ -9889,9 +9888,9 @@ static inline void gen_intermediate_code_internal(CPUARMState *env,
>          if (search_pc) {
>              j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
>              if (lj < j) {
> -                lj++;
> -                while (lj < j)
> -                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
> +                while (++lj < j) {
> +                    tcg_ctx.gen_opc_instr_start[lj] = 0;
> +                }
>              }
>              tcg_ctx.gen_opc_pc[lj] = dc->pc;
>              gen_opc_condexec_bits[lj] = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
> @@ -10028,9 +10027,9 @@ done_generating:
>  #endif
>      if (search_pc) {
>          j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
> -        lj++;
> -        while (lj <= j)
> -            tcg_ctx.gen_opc_instr_start[lj++] = 0;
> +        while (++lj <= j) {
> +            tcg_ctx.gen_opc_instr_start[lj] = 0;
> +        }
>      } else {
>          tb->size = dc->pc - pc_start;
>          tb->icount = num_insns;
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index 7596a90..9c5e1a3 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -8319,11 +8319,10 @@ static inline void gen_intermediate_code_internal(CPUX86State *env,
>      dc->is_jmp = DISAS_NEXT;
>      pc_ptr = pc_start;
>      lj = -1;
> -    num_insns = 0;
>      max_insns = tb->cflags & CF_COUNT_MASK;
> -    if (max_insns == 0)
> +    if (max_insns == 0) {
>          max_insns = CF_COUNT_MASK;
> -
> +    }
>      gen_tb_start();
>      for(;;) {
>          if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) {
> @@ -8338,9 +8337,9 @@ static inline void gen_intermediate_code_internal(CPUX86State *env,
>          if (search_pc) {
>              j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
>              if (lj < j) {
> -                lj++;
> -                while (lj < j)
> -                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
> +                while (++lj < j) {
> +                    tcg_ctx.gen_opc_instr_start[lj] = 0;
> +                }
>              }
>              tcg_ctx.gen_opc_pc[lj] = pc_ptr;
>              gen_opc_cc_op[lj] = dc->cc_op;
> @@ -8387,9 +8386,9 @@ static inline void gen_intermediate_code_internal(CPUX86State *env,
>      /* we don't forget to fill the last values */
>      if (search_pc) {
>          j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
> -        lj++;
> -        while (lj <= j)
> -            tcg_ctx.gen_opc_instr_start[lj++] = 0;
> +        while (++lj <= j) {
> +            tcg_ctx.gen_opc_instr_start[lj] = 0;
> +        }
>      }
>  
>  #ifdef DEBUG_DISAS
> diff --git a/target-mips/translate.c b/target-mips/translate.c
> index b7f8203..d1e5d84 100644
> --- a/target-mips/translate.c
> +++ b/target-mips/translate.c
> @@ -15571,10 +15571,10 @@ gen_intermediate_code_internal (CPUMIPSState *env, TranslationBlock *tb,
>  #else
>          ctx.mem_idx = ctx.hflags & MIPS_HFLAG_KSU;
>  #endif
> -    num_insns = 0;
>      max_insns = tb->cflags & CF_COUNT_MASK;
> -    if (max_insns == 0)
> +    if (max_insns == 0) {
>          max_insns = CF_COUNT_MASK;
> +    }
>      LOG_DISAS("\ntb %p idx %d hflags %04x\n", tb, ctx.mem_idx, ctx.hflags);
>      gen_tb_start();
>      while (ctx.bstate == BS_NONE) {
> @@ -15595,9 +15595,9 @@ gen_intermediate_code_internal (CPUMIPSState *env, TranslationBlock *tb,
>          if (search_pc) {
>              j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
>              if (lj < j) {
> -                lj++;
> -                while (lj < j)
> -                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
> +                while (++lj < j) {
> +                    tcg_ctx.gen_opc_instr_start[lj] = 0;
> +                }
>              }
>              tcg_ctx.gen_opc_pc[lj] = ctx.pc;
>              gen_opc_hflags[lj] = ctx.hflags & MIPS_HFLAG_BMASK;
> @@ -15678,9 +15678,9 @@ done_generating:
>      *tcg_ctx.gen_opc_ptr = INDEX_op_end;
>      if (search_pc) {
>          j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
> -        lj++;
> -        while (lj <= j)
> -            tcg_ctx.gen_opc_instr_start[lj++] = 0;
> +        while (++lj <= j) {
> +            tcg_ctx.gen_opc_instr_start[lj] = 0;
> +        }
>      } else {
>          tb->size = ctx.pc - pc_start;
>          tb->icount = num_insns;
> -- 
> 1.7.2.5
>
Paolo Bonzini - April 9, 2013, 8:11 a.m.
Il 09/04/2013 10:05, 陳韋任 (Wei-Ren Chen) ha scritto:
> Hi liguang,
> 
>   Just to be curious, how much performance improvement this patch can get?

I think zero.  It is indeed making the code a tiny bit more readable in
the 2nd/3rd/5th/6th hunks.

But the 1st and 4th hunks, which do

-    num_insns = 0;

are wrong.  I'm surprised the compiler doesn't report usage of an
uninitialized variable.  liguang, how did you test them?

Paolo


> Regards,
> chenwj
liguang - April 9, 2013, 8:21 a.m.
在 2013-04-09二的 10:11 +0200,Paolo Bonzini写道:
> Il 09/04/2013 10:05, 陳韋任 (Wei-Ren Chen) ha scritto:
> > Hi liguang,
> > 
> >   Just to be curious, how much performance improvement this patch can get?
> 
> I think zero.  It is indeed making the code a tiny bit more readable in
> the 2nd/3rd/5th/6th hunks.

I think maybe a little greater than 0,
for it optimizes a variable 'lj' increment.

> 
> But the 1st and 4th hunks, which do
> 
> -    num_insns = 0;
> 
> are wrong.  I'm surprised the compiler doesn't report usage of an
> uninitialized variable.  liguang, how did you test them?

I do normal compile and run guest OS for TCG,
I think 'max_insns = tb->cflags & CF_COUNT_MASK;' is enough
to feed compiler.

> 
> Paolo
> 
> 
> > Regards,
> > chenwj
>
陳韋任 - April 9, 2013, 8:33 a.m.
On Tue, Apr 09, 2013 at 10:11:37AM +0200, Paolo Bonzini wrote:
> Il 09/04/2013 10:05, 陳韋任 (Wei-Ren Chen) ha scritto:
> > Hi liguang,
> > 
> >   Just to be curious, how much performance improvement this patch can get?
> 
> I think zero.  It is indeed making the code a tiny bit more readable in
> the 2nd/3rd/5th/6th hunks.

  Then I think the subject should be changed from "optimize" to
"readable". "Optimize" is a kind of misleading...

> But the 1st and 4th hunks, which do
> 
> -    num_insns = 0;
> 
> are wrong.  I'm surprised the compiler doesn't report usage of an
> uninitialized variable.  liguang, how did you test them?
> 
> Paolo
> 
> 
> > Regards,
> > chenwj
>
Peter Maydell - April 9, 2013, 9:08 a.m.
On 9 April 2013 09:21, li guang <lig.fnst@cn.fujitsu.com> wrote:
> 在 2013-04-09二的 10:11 +0200,Paolo Bonzini写道:
>> Il 09/04/2013 10:05, 陳韋任 (Wei-Ren Chen) ha scritto:
>> > Hi liguang,
>> >
>> >   Just to be curious, how much performance improvement this patch can get?
>>
>> I think zero.  It is indeed making the code a tiny bit more readable in
>> the 2nd/3rd/5th/6th hunks.
>
> I think maybe a little greater than 0,
> for it optimizes a variable 'lj' increment.

(1) The compiler is easily smart enough to know that both ways
of writing the code are equivalent, and to generate whatever
code is better
(2) This is in a comparatively rare code path anyway, because
it only happens when we take an unexpected exception [eg fault
on a guest load/store]
(3) All optimisation should start with profiling, otherwise
you have no idea whether you're even looking at the right
places to improve.

-- PMM
陳韋任 - April 9, 2013, 9:20 a.m.
Hi liguang,

On Tue, Apr 09, 2013 at 04:21:10PM +0800, li guang wrote:
> 在 2013-04-09二的 10:11 +0200,Paolo Bonzini写道:
> > Il 09/04/2013 10:05, 陳韋任 (Wei-Ren Chen) ha scritto:
> > > Hi liguang,
> > > 
> > >   Just to be curious, how much performance improvement this patch can get?
> > 
> > I think zero.  It is indeed making the code a tiny bit more readable in
> > the 2nd/3rd/5th/6th hunks.
> 
> I think maybe a little greater than 0,
> for it optimizes a variable 'lj' increment.

  As Peter said, only profiling can show if there is a performance
improvement.

> > But the 1st and 4th hunks, which do
> > 
> > -    num_insns = 0;
> > 
> > are wrong.  I'm surprised the compiler doesn't report usage of an
> > uninitialized variable.  liguang, how did you test them?
> 
> I do normal compile and run guest OS for TCG,
> I think 'max_insns = tb->cflags & CF_COUNT_MASK;' is enough
> to feed compiler.

  num_insns and max_insns are two different variables, right? So this
assignment does not do anything with num_insns.

Regards,
chenwj
liguang - April 10, 2013, 12:28 a.m.
在 2013-04-09二的 10:08 +0100,Peter Maydell写道:
> On 9 April 2013 09:21, li guang <lig.fnst@cn.fujitsu.com> wrote:
> > 在 2013-04-09二的 10:11 +0200,Paolo Bonzini写道:
> >> Il 09/04/2013 10:05, 陳韋任 (Wei-Ren Chen) ha scritto:
> >> > Hi liguang,
> >> >
> >> >   Just to be curious, how much performance improvement this patch can get?
> >>
> >> I think zero.  It is indeed making the code a tiny bit more readable in
> >> the 2nd/3rd/5th/6th hunks.
> >
> > I think maybe a little greater than 0,
> > for it optimizes a variable 'lj' increment.
> 
> (1) The compiler is easily smart enough to know that both ways
> of writing the code are equivalent, and to generate whatever
> code is better
> (2) This is in a comparatively rare code path anyway, because
> it only happens when we take an unexpected exception [eg fault
> on a guest load/store]
> (3) All optimisation should start with profiling, otherwise
> you have no idea whether you're even looking at the right
> places to improve.
> 

OK, thanks!
but, please make no mistake,
I'm not saying this patch promote performance of TCG,
this just optimize code writing of this function, of
course code writing is not deemed to promote its performance.
or, we shouldn't do any code change if they benefit little
in performance?
陳韋任 - April 10, 2013, 2:31 a.m.
Hi liguang,

> OK, thanks!
> but, please make no mistake,
> I'm not saying this patch promote performance of TCG,
> this just optimize code writing of this function, of
> course code writing is not deemed to promote its performance.
> or, we shouldn't do any code change if they benefit little
> in performance?

  IMO, if you are saying the patch can improve performance, then
you should run benchmark to convince others. If your patch is trying
to make code clear, then "optimize" might not be a good term to describe
your patch.

Regards,
chenwj
liguang - April 10, 2013, 2:38 a.m.
在 2013-04-10三的 10:31 +0800,陳韋任 (Wei-Ren Chen)写道:
> Hi liguang,
> 
> > OK, thanks!
> > but, please make no mistake,
> > I'm not saying this patch promote performance of TCG,
> > this just optimize code writing of this function, of
> > course code writing is not deemed to promote its performance.
> > or, we shouldn't do any code change if they benefit little
> > in performance?
> 
>   IMO, if you are saying the patch can improve performance, 

sorry, I'm not saying that :-)

> then you should run benchmark to convince others. If your patch is trying
> to make code clear, then "optimize" might not be a good term to describe
> your patch.
> 

you're right, seems the word 'optimize' is not good here.
but what should I use?
you know, even compiler like gcc will use -O for code optimization.
陳韋任 - April 10, 2013, 2:58 a.m.
> you're right, seems the word 'optimize' is not good here.
> but what should I use?
> you know, even compiler like gcc will use -O for code optimization.

  "translate: code cleanup in gen_intermediate_code_internal" would be
better, I think. :)

Regards,
chenwj
liguang - April 10, 2013, 3:03 a.m.
在 2013-04-10三的 10:58 +0800,陳韋任 (Wei-Ren Chen)写道:
> > you're right, seems the word 'optimize' is not good here.
> > but what should I use?
> > you know, even compiler like gcc will use -O for code optimization.
> 
>   "translate: code cleanup in gen_intermediate_code_internal" would be
> better, I think. :)
> 

OK, thanks!
liguang - April 11, 2013, 2:12 a.m.
在 2013-04-09二的 17:20 +0800,陳韋任 (Wei-Ren Chen)写道:
> Hi liguang,
> 
> On Tue, Apr 09, 2013 at 04:21:10PM +0800, li guang wrote:
> > 在 2013-04-09二的 10:11 +0200,Paolo Bonzini写道:
> > > Il 09/04/2013 10:05, 陳韋任 (Wei-Ren Chen) ha scritto:
> > > > Hi liguang,
> > > > 
> > > >   Just to be curious, how much performance improvement this patch can get?
> > > 
> > > I think zero.  It is indeed making the code a tiny bit more readable in
> > > the 2nd/3rd/5th/6th hunks.
> > 
> > I think maybe a little greater than 0,
> > for it optimizes a variable 'lj' increment.
> 
>   As Peter said, only profiling can show if there is a performance
> improvement.
> 
> > > But the 1st and 4th hunks, which do
> > > 
> > > -    num_insns = 0;
> > > 
> > > are wrong.  I'm surprised the compiler doesn't report usage of an
> > > uninitialized variable.  liguang, how did you test them?
> > 
> > I do normal compile and run guest OS for TCG,
> > I think 'max_insns = tb->cflags & CF_COUNT_MASK;' is enough
> > to feed compiler.
> 
>   num_insns and max_insns are two different variables, right? So this
> assignment does not do anything with num_insns.

Yes, you're right.
Thanks!

> 
> Regards,
> chenwj
>

Patch

diff --git a/target-arm/translate.c b/target-arm/translate.c
index 35a21be..c0c080d 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -9806,11 +9806,10 @@  static inline void gen_intermediate_code_internal(CPUARMState *env,
     cpu_M0 = tcg_temp_new_i64();
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
     lj = -1;
-    num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
-    if (max_insns == 0)
+    if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
-
+    }
     gen_tb_start();
 
     tcg_clear_temp_count();
@@ -9889,9 +9888,9 @@  static inline void gen_intermediate_code_internal(CPUARMState *env,
         if (search_pc) {
             j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
             if (lj < j) {
-                lj++;
-                while (lj < j)
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
+                while (++lj < j) {
+                    tcg_ctx.gen_opc_instr_start[lj] = 0;
+                }
             }
             tcg_ctx.gen_opc_pc[lj] = dc->pc;
             gen_opc_condexec_bits[lj] = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
@@ -10028,9 +10027,9 @@  done_generating:
 #endif
     if (search_pc) {
         j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
-        lj++;
-        while (lj <= j)
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
+        while (++lj <= j) {
+            tcg_ctx.gen_opc_instr_start[lj] = 0;
+        }
     } else {
         tb->size = dc->pc - pc_start;
         tb->icount = num_insns;
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 7596a90..9c5e1a3 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -8319,11 +8319,10 @@  static inline void gen_intermediate_code_internal(CPUX86State *env,
     dc->is_jmp = DISAS_NEXT;
     pc_ptr = pc_start;
     lj = -1;
-    num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
-    if (max_insns == 0)
+    if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
-
+    }
     gen_tb_start();
     for(;;) {
         if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) {
@@ -8338,9 +8337,9 @@  static inline void gen_intermediate_code_internal(CPUX86State *env,
         if (search_pc) {
             j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
             if (lj < j) {
-                lj++;
-                while (lj < j)
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
+                while (++lj < j) {
+                    tcg_ctx.gen_opc_instr_start[lj] = 0;
+                }
             }
             tcg_ctx.gen_opc_pc[lj] = pc_ptr;
             gen_opc_cc_op[lj] = dc->cc_op;
@@ -8387,9 +8386,9 @@  static inline void gen_intermediate_code_internal(CPUX86State *env,
     /* we don't forget to fill the last values */
     if (search_pc) {
         j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
-        lj++;
-        while (lj <= j)
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
+        while (++lj <= j) {
+            tcg_ctx.gen_opc_instr_start[lj] = 0;
+        }
     }
 
 #ifdef DEBUG_DISAS
diff --git a/target-mips/translate.c b/target-mips/translate.c
index b7f8203..d1e5d84 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -15571,10 +15571,10 @@  gen_intermediate_code_internal (CPUMIPSState *env, TranslationBlock *tb,
 #else
         ctx.mem_idx = ctx.hflags & MIPS_HFLAG_KSU;
 #endif
-    num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
-    if (max_insns == 0)
+    if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
+    }
     LOG_DISAS("\ntb %p idx %d hflags %04x\n", tb, ctx.mem_idx, ctx.hflags);
     gen_tb_start();
     while (ctx.bstate == BS_NONE) {
@@ -15595,9 +15595,9 @@  gen_intermediate_code_internal (CPUMIPSState *env, TranslationBlock *tb,
         if (search_pc) {
             j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
             if (lj < j) {
-                lj++;
-                while (lj < j)
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
+                while (++lj < j) {
+                    tcg_ctx.gen_opc_instr_start[lj] = 0;
+                }
             }
             tcg_ctx.gen_opc_pc[lj] = ctx.pc;
             gen_opc_hflags[lj] = ctx.hflags & MIPS_HFLAG_BMASK;
@@ -15678,9 +15678,9 @@  done_generating:
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     if (search_pc) {
         j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
-        lj++;
-        while (lj <= j)
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
+        while (++lj <= j) {
+            tcg_ctx.gen_opc_instr_start[lj] = 0;
+        }
     } else {
         tb->size = ctx.pc - pc_start;
         tb->icount = num_insns;