Patchwork [21/22] tcg-i386: Use lea for three-operand add.

login
register
mail settings
Submitter Richard Henderson
Date April 14, 2010, 8:29 p.m.
Message ID <4bba2d47c5a2b61e2e68b0546f27183b7277a5d4.1272479073.git.rth@twiddle.net>
Download mbox | patch
Permalink /patch/51203/
State New
Headers show

Comments

Richard Henderson - April 14, 2010, 8:29 p.m.
The result is shorter than the mov+add that TCG would
otherwise generate for us.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/i386/tcg-target.c |   23 ++++++++++++++++++++---
 1 files changed, 20 insertions(+), 3 deletions(-)
Aurelien Jarno - May 21, 2010, 9:44 a.m.
On Wed, Apr 14, 2010 at 01:29:27PM -0700, Richard Henderson wrote:
> The result is shorter than the mov+add that TCG would
> otherwise generate for us.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>

Acked-by: Aurelien Jarno <aurelien@aurel32.net>

> ---
>  tcg/i386/tcg-target.c |   23 ++++++++++++++++++++---
>  1 files changed, 20 insertions(+), 3 deletions(-)
> 
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index 755d46d..646a7b6 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -1186,6 +1186,25 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>      case INDEX_op_st_i32:
>          tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
>          break;
> +    case INDEX_op_add_i32:
> +        /* For 3-operand addition, use LEA.  */
> +        if (args[0] != args[1]) {
> +            TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
> +
> +            if (const_args[2]) {
> +                c3 = a2, a2 = -1;
> +            } else if (a0 == a2) {
> +                /* Watch out for dest = src + dest, since we've removed
> +                   the matching constraint on the add.  */
> +                tgen_arithr(s, ARITH_ADD, a0, a1);
> +                break;
> +            }
> +
> +            tcg_out_modrm_sib_offset(s, OPC_LEA, a0, a1, a2, 0, c3);
> +            break;
> +        }
> +        c = ARITH_ADD;
> +        goto gen_arith;
>      case INDEX_op_sub_i32:
>          c = ARITH_SUB;
>          goto gen_arith;
> @@ -1198,8 +1217,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>      case INDEX_op_xor_i32:
>          c = ARITH_XOR;
>          goto gen_arith;
> -    case INDEX_op_add_i32:
> -        c = ARITH_ADD;
>      gen_arith:
>          if (const_args[2]) {
>              tgen_arithi(s, c, args[0], args[2], 0);
> @@ -1374,7 +1391,7 @@ static const TCGTargetOpDef x86_op_defs[] = {
>      { INDEX_op_st16_i32, { "r", "r" } },
>      { INDEX_op_st_i32, { "r", "r" } },
>  
> -    { INDEX_op_add_i32, { "r", "0", "ri" } },
> +    { INDEX_op_add_i32, { "r", "r", "ri" } },
>      { INDEX_op_sub_i32, { "r", "0", "ri" } },
>      { INDEX_op_mul_i32, { "r", "0", "ri" } },
>      { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
> -- 
> 1.6.6.1
> 
> 
> 
>

Patch

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 755d46d..646a7b6 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -1186,6 +1186,25 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_st_i32:
         tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
         break;
+    case INDEX_op_add_i32:
+        /* For 3-operand addition, use LEA.  */
+        if (args[0] != args[1]) {
+            TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
+
+            if (const_args[2]) {
+                c3 = a2, a2 = -1;
+            } else if (a0 == a2) {
+                /* Watch out for dest = src + dest, since we've removed
+                   the matching constraint on the add.  */
+                tgen_arithr(s, ARITH_ADD, a0, a1);
+                break;
+            }
+
+            tcg_out_modrm_sib_offset(s, OPC_LEA, a0, a1, a2, 0, c3);
+            break;
+        }
+        c = ARITH_ADD;
+        goto gen_arith;
     case INDEX_op_sub_i32:
         c = ARITH_SUB;
         goto gen_arith;
@@ -1198,8 +1217,6 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_xor_i32:
         c = ARITH_XOR;
         goto gen_arith;
-    case INDEX_op_add_i32:
-        c = ARITH_ADD;
     gen_arith:
         if (const_args[2]) {
             tgen_arithi(s, c, args[0], args[2], 0);
@@ -1374,7 +1391,7 @@  static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_st16_i32, { "r", "r" } },
     { INDEX_op_st_i32, { "r", "r" } },
 
-    { INDEX_op_add_i32, { "r", "0", "ri" } },
+    { INDEX_op_add_i32, { "r", "r", "ri" } },
     { INDEX_op_sub_i32, { "r", "0", "ri" } },
     { INDEX_op_mul_i32, { "r", "0", "ri" } },
     { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },