Patchwork [08/10] tcg: Constant fold add2 and sub2

login
register
mail settings
Submitter Richard Henderson
Date Oct. 2, 2012, 6:32 p.m.
Message ID <1349202750-16815-9-git-send-email-rth@twiddle.net>
Download mbox | patch
Permalink /patch/188610/
State New
Headers show

Comments

Richard Henderson - Oct. 2, 2012, 6:32 p.m.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/optimize.c | 35 +++++++++++++++++++++++++++++++++++
 tcg/tcg-op.h   |  9 +++++++++
 2 files changed, 44 insertions(+)
Aurelien Jarno - Oct. 10, 2012, 9:52 a.m.
On Tue, Oct 02, 2012 at 11:32:28AM -0700, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/optimize.c | 35 +++++++++++++++++++++++++++++++++++
>  tcg/tcg-op.h   |  9 +++++++++
>  2 files changed, 44 insertions(+)
> 
> diff --git a/tcg/optimize.c b/tcg/optimize.c
> index d9251e4..05891ef 100644
> --- a/tcg/optimize.c
> +++ b/tcg/optimize.c
> @@ -796,6 +796,41 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
>              }
>              goto do_default;
>  
> +        case INDEX_op_add2_i32:
> +        case INDEX_op_sub2_i32:
> +            if (temps[args[2]].state == TCG_TEMP_CONST
> +                && temps[args[3]].state == TCG_TEMP_CONST
> +                && temps[args[4]].state == TCG_TEMP_CONST
> +                && temps[args[5]].state == TCG_TEMP_CONST) {
> +                uint32_t al = temps[args[2]].val;
> +                uint32_t ah = temps[args[3]].val;
> +                uint32_t bl = temps[args[4]].val;
> +                uint32_t bh = temps[args[5]].val;
> +                uint64_t a = ((uint64_t)ah << 32) | al;
> +                uint64_t b = ((uint64_t)bh << 32) | bl;
> +                TCGArg rl, rh;
> +
> +                if (op == INDEX_op_add2_i32) {
> +                    a += b;
> +                } else {
> +                    a -= b;
> +                }
> +
> +                /* We emit the extra nop when we emit the add2/sub2.  */
> +                assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
> +
> +                rl = args[0];
> +                rh = args[1];
> +                gen_opc_buf[op_index] = INDEX_op_movi_i32;
> +                gen_opc_buf[++op_index] = INDEX_op_movi_i32;
> +                tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a);
> +                tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32));
> +                gen_args += 4;
> +                args += 6;
> +                break;
> +            }
> +            goto do_default;
> +
>          case INDEX_op_brcond2_i32:
>              tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
>              if (tmp != 2) {
> diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
> index bd93fe4..1f5a021 100644
> --- a/tcg/tcg-op.h
> +++ b/tcg/tcg-op.h
> @@ -25,6 +25,11 @@
>  
>  int gen_new_label(void);
>  
> +static inline void tcg_gen_op0(TCGOpcode opc)
> +{
> +    *gen_opc_ptr++ = opc;
> +}
> +
>  static inline void tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 arg1)
>  {
>      *gen_opc_ptr++ = opc;
> @@ -866,6 +871,8 @@ static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
>      tcg_gen_op6_i32(INDEX_op_add2_i32, TCGV_LOW(ret), TCGV_HIGH(ret),
>                      TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
>                      TCGV_HIGH(arg2));
> +    /* Allow the optimizer room to replace add2 with two moves.  */
> +    tcg_gen_op0(INDEX_op_nop);
>  }
>  
>  static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
> @@ -873,6 +880,8 @@ static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
>      tcg_gen_op6_i32(INDEX_op_sub2_i32, TCGV_LOW(ret), TCGV_HIGH(ret),
>                      TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
>                      TCGV_HIGH(arg2));
> +    /* Allow the optimizer room to replace sub2 with two moves.  */
> +    tcg_gen_op0(INDEX_op_nop);
>  }
>  
>  static inline void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>

Patch

diff --git a/tcg/optimize.c b/tcg/optimize.c
index d9251e4..05891ef 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -796,6 +796,41 @@  static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             }
             goto do_default;
 
+        case INDEX_op_add2_i32:
+        case INDEX_op_sub2_i32:
+            if (temps[args[2]].state == TCG_TEMP_CONST
+                && temps[args[3]].state == TCG_TEMP_CONST
+                && temps[args[4]].state == TCG_TEMP_CONST
+                && temps[args[5]].state == TCG_TEMP_CONST) {
+                uint32_t al = temps[args[2]].val;
+                uint32_t ah = temps[args[3]].val;
+                uint32_t bl = temps[args[4]].val;
+                uint32_t bh = temps[args[5]].val;
+                uint64_t a = ((uint64_t)ah << 32) | al;
+                uint64_t b = ((uint64_t)bh << 32) | bl;
+                TCGArg rl, rh;
+
+                if (op == INDEX_op_add2_i32) {
+                    a += b;
+                } else {
+                    a -= b;
+                }
+
+                /* We emit the extra nop when we emit the add2/sub2.  */
+                assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
+
+                rl = args[0];
+                rh = args[1];
+                gen_opc_buf[op_index] = INDEX_op_movi_i32;
+                gen_opc_buf[++op_index] = INDEX_op_movi_i32;
+                tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a);
+                tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32));
+                gen_args += 4;
+                args += 6;
+                break;
+            }
+            goto do_default;
+
         case INDEX_op_brcond2_i32:
             tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
             if (tmp != 2) {
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index bd93fe4..1f5a021 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -25,6 +25,11 @@ 
 
 int gen_new_label(void);
 
+static inline void tcg_gen_op0(TCGOpcode opc)
+{
+    *gen_opc_ptr++ = opc;
+}
+
 static inline void tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 arg1)
 {
     *gen_opc_ptr++ = opc;
@@ -866,6 +871,8 @@  static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
     tcg_gen_op6_i32(INDEX_op_add2_i32, TCGV_LOW(ret), TCGV_HIGH(ret),
                     TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
                     TCGV_HIGH(arg2));
+    /* Allow the optimizer room to replace add2 with two moves.  */
+    tcg_gen_op0(INDEX_op_nop);
 }
 
 static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -873,6 +880,8 @@  static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
     tcg_gen_op6_i32(INDEX_op_sub2_i32, TCGV_LOW(ret), TCGV_HIGH(ret),
                     TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
                     TCGV_HIGH(arg2));
+    /* Allow the optimizer room to replace sub2 with two moves.  */
+    tcg_gen_op0(INDEX_op_nop);
 }
 
 static inline void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)