diff mbox

[v2,0/7] tcg: movcond (ppc32 version)

Message ID alpine.LNX.2.00.1209220007340.8843@linmac
State New
Headers show

Commit Message

malc Sept. 21, 2012, 8:10 p.m. UTC

Comments

Richard Henderson Sept. 21, 2012, 10:21 p.m. UTC | #1
On 09/21/2012 01:10 PM, malc wrote:
> +        if (dest == v2) {
> +            label_ptr = s->code_ptr;
> +            tcg_out32 (s, tcg_to_bc[tcg_invert_cond (cond)]);
> +            tcg_out_mov (s, TCG_TYPE_I32, dest, v1);
> +            reloc_pc14 (label_ptr, (tcg_target_long) s->code_ptr);
> +        }
> +        else {
> +            tcg_out_mov (s, TCG_TYPE_I32, dest, v1);
> +            label_ptr = s->code_ptr;
> +            tcg_out32 (s, tcg_to_bc[cond]);
> +            tcg_out_mov (s, TCG_TYPE_I32, dest, v2);
> +            reloc_pc14 (label_ptr, (tcg_target_long) s->code_ptr);
> +        }

How about

    if (dest == v2) {
        cond = tcg_invert_cond(cond);
        v2 = v1;
    } else if (dest != v1) {
        tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
    }
    /* Branch forward over one insn.  */
    tcg_out32 (s, tcg_to_bc[cond] | 4);
    tcg_out_mov(s, TCG_TYPE_I32, dest, v2);

which avoids an extra mov if dest == v1, and also minimizes the code.


r~
malc Sept. 21, 2012, 10:34 p.m. UTC | #2
On Fri, 21 Sep 2012, Richard Henderson wrote:

> On 09/21/2012 01:10 PM, malc wrote:
> > +        if (dest == v2) {
> > +            label_ptr = s->code_ptr;
> > +            tcg_out32 (s, tcg_to_bc[tcg_invert_cond (cond)]);
> > +            tcg_out_mov (s, TCG_TYPE_I32, dest, v1);
> > +            reloc_pc14 (label_ptr, (tcg_target_long) s->code_ptr);
> > +        }
> > +        else {
> > +            tcg_out_mov (s, TCG_TYPE_I32, dest, v1);
> > +            label_ptr = s->code_ptr;
> > +            tcg_out32 (s, tcg_to_bc[cond]);
> > +            tcg_out_mov (s, TCG_TYPE_I32, dest, v2);
> > +            reloc_pc14 (label_ptr, (tcg_target_long) s->code_ptr);
> > +        }
> 
> How about
> 
>     if (dest == v2) {
>         cond = tcg_invert_cond(cond);
>         v2 = v1;
>     } else if (dest != v1) {
>         tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
>     }
>     /* Branch forward over one insn.  */
>     tcg_out32 (s, tcg_to_bc[cond] | 4);
>     tcg_out_mov(s, TCG_TYPE_I32, dest, v2);
> 
> which avoids an extra mov if dest == v1, and also minimizes the code.

Yes, thanks, that's better (save for | 4 part which is 4 too little)
Blue Swirl Sept. 22, 2012, 2:38 p.m. UTC | #3
On Fri, Sep 21, 2012 at 8:10 PM, malc <av1474@comtv.ru> wrote:
> diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
> index 26c4b33..0fb6fc7 100644
> --- a/tcg/ppc/tcg-target.c
> +++ b/tcg/ppc/tcg-target.c
> @@ -390,6 +390,7 @@ static int tcg_target_const_match(tcg_target_long val,
>  #define ORC    XO31(412)
>  #define EQV    XO31(284)
>  #define NAND   XO31(476)
> +#define ISEL   XO31( 15)
>
>  #define LBZX   XO31( 87)
>  #define LHZX   XO31(279)
> @@ -1269,6 +1270,75 @@ static void tcg_out_setcond2 (TCGContext *s, const TCGArg *args,
>          );
>  }
>
> +static void tcg_out_movcond (TCGContext *s, TCGCond cond,
> +                             TCGArg dest,
> +                             TCGArg c1, TCGArg c2,
> +                             TCGArg v1, TCGArg v2,
> +                             int const_c2)
> +{
> +    tcg_out_cmp (s, cond, c1, c2, const_c2, 7);
> +
> +    if (1) {
> +        /* At least here on 7747A bit twiddling hacks are outperformed
> +           by jumpy code (the testing was not scientific) */
> +        void *label_ptr;
> +
> +        if (dest == v2) {
> +            label_ptr = s->code_ptr;
> +            tcg_out32 (s, tcg_to_bc[tcg_invert_cond (cond)]);
> +            tcg_out_mov (s, TCG_TYPE_I32, dest, v1);
> +            reloc_pc14 (label_ptr, (tcg_target_long) s->code_ptr);
> +        }
> +        else {

} else {

> +            tcg_out_mov (s, TCG_TYPE_I32, dest, v1);
> +            label_ptr = s->code_ptr;
> +            tcg_out32 (s, tcg_to_bc[cond]);
> +            tcg_out_mov (s, TCG_TYPE_I32, dest, v2);
> +            reloc_pc14 (label_ptr, (tcg_target_long) s->code_ptr);
> +        }
> +    }
> +    else {

} else {

> +        /* isel version, if (1) above should be replaced once a way to
> +           figure out availability of isel on the underlying hardware
> +           is found */
> +        int tab, bc;
> +
> +        switch (cond) {
> +        case TCG_COND_EQ:
> +            tab = TAB (dest, v1, v2);
> +            bc = CR_EQ;
> +            break;
> +        case TCG_COND_NE:
> +            tab = TAB (dest, v2, v1);
> +            bc = CR_EQ;
> +            break;
> +        case TCG_COND_LTU:
> +        case TCG_COND_LT:
> +            tab = TAB (dest, v1, v2);
> +            bc = CR_LT;
> +            break;
> +        case TCG_COND_GEU:
> +        case TCG_COND_GE:
> +            tab = TAB (dest, v2, v1);
> +            bc = CR_LT;
> +            break;
> +        case TCG_COND_LEU:
> +        case TCG_COND_LE:
> +            tab = TAB (dest, v2, v1);
> +            bc = CR_GT;
> +            break;
> +        case TCG_COND_GTU:
> +        case TCG_COND_GT:
> +            tab = TAB (dest, v1, v2);
> +            bc = CR_GT;
> +            break;
> +        default:
> +            tcg_abort ();
> +        }
> +        tcg_out32 (s, ISEL | tab | ((bc + 28) << 6));
> +    }
> +}
> +
>  static void tcg_out_brcond (TCGContext *s, TCGCond cond,
>                              TCGArg arg1, TCGArg arg2, int const_arg2,
>                              int label_index)
> @@ -1826,6 +1896,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
>              );
>          break;
>
> +    case INDEX_op_movcond_i32:
> +        tcg_out_movcond (s, args[5], args[0],
> +                         args[1], args[2],
> +                         args[3], args[4],
> +                         const_args[2]);
> +        break;
> +
>      default:
>          tcg_dump_ops (s);
>          tcg_abort ();
> @@ -1922,6 +1999,7 @@ static const TCGTargetOpDef ppc_op_defs[] = {
>      { INDEX_op_ext16u_i32, { "r", "r" } },
>
>      { INDEX_op_deposit_i32, { "r", "0", "r" } },
> +    { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "r" } },
>
>      { -1 },
>  };
> diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
> index 177eea1..3259d89 100644
> --- a/tcg/ppc/tcg-target.h
> +++ b/tcg/ppc/tcg-target.h
> @@ -92,7 +92,7 @@ typedef enum {
>  #define TCG_TARGET_HAS_nand_i32         1
>  #define TCG_TARGET_HAS_nor_i32          1
>  #define TCG_TARGET_HAS_deposit_i32      1
> -#define TCG_TARGET_HAS_movcond_i32      0
> +#define TCG_TARGET_HAS_movcond_i32      1
>
>  #define TCG_AREG0 TCG_REG_R27
>
>
diff mbox

Patch

diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 26c4b33..0fb6fc7 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -390,6 +390,7 @@  static int tcg_target_const_match(tcg_target_long val,
 #define ORC    XO31(412)
 #define EQV    XO31(284)
 #define NAND   XO31(476)
+#define ISEL   XO31( 15)
 
 #define LBZX   XO31( 87)
 #define LHZX   XO31(279)
@@ -1269,6 +1270,75 @@  static void tcg_out_setcond2 (TCGContext *s, const TCGArg *args,
         );
 }
 
+static void tcg_out_movcond (TCGContext *s, TCGCond cond,
+                             TCGArg dest,
+                             TCGArg c1, TCGArg c2,
+                             TCGArg v1, TCGArg v2,
+                             int const_c2)
+{
+    tcg_out_cmp (s, cond, c1, c2, const_c2, 7);
+
+    if (1) {
+        /* At least here on 7747A bit twiddling hacks are outperformed
+           by jumpy code (the testing was not scientific) */
+        void *label_ptr;
+
+        if (dest == v2) {
+            label_ptr = s->code_ptr;
+            tcg_out32 (s, tcg_to_bc[tcg_invert_cond (cond)]);
+            tcg_out_mov (s, TCG_TYPE_I32, dest, v1);
+            reloc_pc14 (label_ptr, (tcg_target_long) s->code_ptr);
+        }
+        else {
+            tcg_out_mov (s, TCG_TYPE_I32, dest, v1);
+            label_ptr = s->code_ptr;
+            tcg_out32 (s, tcg_to_bc[cond]);
+            tcg_out_mov (s, TCG_TYPE_I32, dest, v2);
+            reloc_pc14 (label_ptr, (tcg_target_long) s->code_ptr);
+        }
+    }
+    else {
+        /* isel version, if (1) above should be replaced once a way to
+           figure out availability of isel on the underlying hardware
+           is found */
+        int tab, bc;
+
+        switch (cond) {
+        case TCG_COND_EQ:
+            tab = TAB (dest, v1, v2);
+            bc = CR_EQ;
+            break;
+        case TCG_COND_NE:
+            tab = TAB (dest, v2, v1);
+            bc = CR_EQ;
+            break;
+        case TCG_COND_LTU:
+        case TCG_COND_LT:
+            tab = TAB (dest, v1, v2);
+            bc = CR_LT;
+            break;
+        case TCG_COND_GEU:
+        case TCG_COND_GE:
+            tab = TAB (dest, v2, v1);
+            bc = CR_LT;
+            break;
+        case TCG_COND_LEU:
+        case TCG_COND_LE:
+            tab = TAB (dest, v2, v1);
+            bc = CR_GT;
+            break;
+        case TCG_COND_GTU:
+        case TCG_COND_GT:
+            tab = TAB (dest, v1, v2);
+            bc = CR_GT;
+            break;
+        default:
+            tcg_abort ();
+        }
+        tcg_out32 (s, ISEL | tab | ((bc + 28) << 6));
+    }
+}
+
 static void tcg_out_brcond (TCGContext *s, TCGCond cond,
                             TCGArg arg1, TCGArg arg2, int const_arg2,
                             int label_index)
@@ -1826,6 +1896,13 @@  static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
             );
         break;
 
+    case INDEX_op_movcond_i32:
+        tcg_out_movcond (s, args[5], args[0],
+                         args[1], args[2],
+                         args[3], args[4],
+                         const_args[2]);
+        break;
+
     default:
         tcg_dump_ops (s);
         tcg_abort ();
@@ -1922,6 +1999,7 @@  static const TCGTargetOpDef ppc_op_defs[] = {
     { INDEX_op_ext16u_i32, { "r", "r" } },
 
     { INDEX_op_deposit_i32, { "r", "0", "r" } },
+    { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "r" } },
 
     { -1 },
 };
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 177eea1..3259d89 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -92,7 +92,7 @@  typedef enum {
 #define TCG_TARGET_HAS_nand_i32         1
 #define TCG_TARGET_HAS_nor_i32          1
 #define TCG_TARGET_HAS_deposit_i32      1
-#define TCG_TARGET_HAS_movcond_i32      0
+#define TCG_TARGET_HAS_movcond_i32      1
 
 #define TCG_AREG0 TCG_REG_R27