Message ID | alpine.LNX.2.00.1209220007340.8843@linmac |
---|---|
State | New |
Headers | show |
On 09/21/2012 01:10 PM, malc wrote: > + if (dest == v2) { > + label_ptr = s->code_ptr; > + tcg_out32 (s, tcg_to_bc[tcg_invert_cond (cond)]); > + tcg_out_mov (s, TCG_TYPE_I32, dest, v1); > + reloc_pc14 (label_ptr, (tcg_target_long) s->code_ptr); > + } > + else { > + tcg_out_mov (s, TCG_TYPE_I32, dest, v1); > + label_ptr = s->code_ptr; > + tcg_out32 (s, tcg_to_bc[cond]); > + tcg_out_mov (s, TCG_TYPE_I32, dest, v2); > + reloc_pc14 (label_ptr, (tcg_target_long) s->code_ptr); > + } How about if (dest == v2) { cond = tcg_invert_cond(cond); v2 = v1; } else if (dest != v1) { tcg_out_mov(s, TCG_TYPE_I32, dest, v1); } /* Branch forward over one insn. */ tcg_out32 (s, tcg_to_bc[cond] | 4); tcg_out_mov(s, TCG_TYPE_I32, dest, v2); which avoids an extra mov if dest == v1, and also minimizes the code. r~
On Fri, 21 Sep 2012, Richard Henderson wrote: > On 09/21/2012 01:10 PM, malc wrote: > > + if (dest == v2) { > > + label_ptr = s->code_ptr; > > + tcg_out32 (s, tcg_to_bc[tcg_invert_cond (cond)]); > > + tcg_out_mov (s, TCG_TYPE_I32, dest, v1); > > + reloc_pc14 (label_ptr, (tcg_target_long) s->code_ptr); > > + } > > + else { > > + tcg_out_mov (s, TCG_TYPE_I32, dest, v1); > > + label_ptr = s->code_ptr; > > + tcg_out32 (s, tcg_to_bc[cond]); > > + tcg_out_mov (s, TCG_TYPE_I32, dest, v2); > > + reloc_pc14 (label_ptr, (tcg_target_long) s->code_ptr); > > + } > > How about > > if (dest == v2) { > cond = tcg_invert_cond(cond); > v2 = v1; > } else if (dest != v1) { > tcg_out_mov(s, TCG_TYPE_I32, dest, v1); > } > /* Branch forward over one insn. */ > tcg_out32 (s, tcg_to_bc[cond] | 4); > tcg_out_mov(s, TCG_TYPE_I32, dest, v2); > > which avoids an extra mov if dest == v1, and also minimizes the code. Yes, thanks, that's better (save for | 4 part which is 4 too little)
On Fri, Sep 21, 2012 at 8:10 PM, malc <av1474@comtv.ru> wrote: > diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c > index 26c4b33..0fb6fc7 100644 > --- a/tcg/ppc/tcg-target.c > +++ b/tcg/ppc/tcg-target.c > @@ -390,6 +390,7 @@ static int tcg_target_const_match(tcg_target_long val, > #define ORC XO31(412) > #define EQV XO31(284) > #define NAND XO31(476) > +#define ISEL XO31( 15) > > #define LBZX XO31( 87) > #define LHZX XO31(279) > @@ -1269,6 +1270,75 @@ static void tcg_out_setcond2 (TCGContext *s, const TCGArg *args, > ); > } > > +static void tcg_out_movcond (TCGContext *s, TCGCond cond, > + TCGArg dest, > + TCGArg c1, TCGArg c2, > + TCGArg v1, TCGArg v2, > + int const_c2) > +{ > + tcg_out_cmp (s, cond, c1, c2, const_c2, 7); > + > + if (1) { > + /* At least here on 7747A bit twiddling hacks are outperformed > + by jumpy code (the testing was not scientific) */ > + void *label_ptr; > + > + if (dest == v2) { > + label_ptr = s->code_ptr; > + tcg_out32 (s, tcg_to_bc[tcg_invert_cond (cond)]); > + tcg_out_mov (s, TCG_TYPE_I32, dest, v1); > + reloc_pc14 (label_ptr, (tcg_target_long) s->code_ptr); > + } > + else { } else { > + tcg_out_mov (s, TCG_TYPE_I32, dest, v1); > + label_ptr = s->code_ptr; > + tcg_out32 (s, tcg_to_bc[cond]); > + tcg_out_mov (s, TCG_TYPE_I32, dest, v2); > + reloc_pc14 (label_ptr, (tcg_target_long) s->code_ptr); > + } > + } > + else { } else { > + /* isel version, if (1) above should be replaced once a way to > + figure out availability of isel on the underlying hardware > + is found */ > + int tab, bc; > + > + switch (cond) { > + case TCG_COND_EQ: > + tab = TAB (dest, v1, v2); > + bc = CR_EQ; > + break; > + case TCG_COND_NE: > + tab = TAB (dest, v2, v1); > + bc = CR_EQ; > + break; > + case TCG_COND_LTU: > + case TCG_COND_LT: > + tab = TAB (dest, v1, v2); > + bc = CR_LT; > + break; > + case TCG_COND_GEU: > + case TCG_COND_GE: > + tab = TAB (dest, v2, v1); > + bc = CR_LT; > + break; > + case TCG_COND_LEU: > + case TCG_COND_LE: > + tab = TAB (dest, v2, v1); > + bc = CR_GT; > + break; > + case TCG_COND_GTU: > + case TCG_COND_GT: > + tab = TAB (dest, v1, v2); > + bc = CR_GT; > + break; > + default: > + tcg_abort (); > + } > + tcg_out32 (s, ISEL | tab | ((bc + 28) << 6)); > + } > +} > + > static void tcg_out_brcond (TCGContext *s, TCGCond cond, > TCGArg arg1, TCGArg arg2, int const_arg2, > int label_index) > @@ -1826,6 +1896,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, > ); > break; > > + case INDEX_op_movcond_i32: > + tcg_out_movcond (s, args[5], args[0], > + args[1], args[2], > + args[3], args[4], > + const_args[2]); > + break; > + > default: > tcg_dump_ops (s); > tcg_abort (); > @@ -1922,6 +1999,7 @@ static const TCGTargetOpDef ppc_op_defs[] = { > { INDEX_op_ext16u_i32, { "r", "r" } }, > > { INDEX_op_deposit_i32, { "r", "0", "r" } }, > + { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "r" } }, > > { -1 }, > }; > diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h > index 177eea1..3259d89 100644 > --- a/tcg/ppc/tcg-target.h > +++ b/tcg/ppc/tcg-target.h > @@ -92,7 +92,7 @@ typedef enum { > #define TCG_TARGET_HAS_nand_i32 1 > #define TCG_TARGET_HAS_nor_i32 1 > #define TCG_TARGET_HAS_deposit_i32 1 > -#define TCG_TARGET_HAS_movcond_i32 0 > +#define TCG_TARGET_HAS_movcond_i32 1 > > #define TCG_AREG0 TCG_REG_R27 > >
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index 26c4b33..0fb6fc7 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -390,6 +390,7 @@ static int tcg_target_const_match(tcg_target_long val, #define ORC XO31(412) #define EQV XO31(284) #define NAND XO31(476) +#define ISEL XO31( 15) #define LBZX XO31( 87) #define LHZX XO31(279) @@ -1269,6 +1270,75 @@ static void tcg_out_setcond2 (TCGContext *s, const TCGArg *args, ); } +static void tcg_out_movcond (TCGContext *s, TCGCond cond, + TCGArg dest, + TCGArg c1, TCGArg c2, + TCGArg v1, TCGArg v2, + int const_c2) +{ + tcg_out_cmp (s, cond, c1, c2, const_c2, 7); + + if (1) { + /* At least here on 7747A bit twiddling hacks are outperformed + by jumpy code (the testing was not scientific) */ + void *label_ptr; + + if (dest == v2) { + label_ptr = s->code_ptr; + tcg_out32 (s, tcg_to_bc[tcg_invert_cond (cond)]); + tcg_out_mov (s, TCG_TYPE_I32, dest, v1); + reloc_pc14 (label_ptr, (tcg_target_long) s->code_ptr); + } + else { + tcg_out_mov (s, TCG_TYPE_I32, dest, v1); + label_ptr = s->code_ptr; + tcg_out32 (s, tcg_to_bc[cond]); + tcg_out_mov (s, TCG_TYPE_I32, dest, v2); + reloc_pc14 (label_ptr, (tcg_target_long) s->code_ptr); + } + } + else { + /* isel version, if (1) above should be replaced once a way to + figure out availability of isel on the underlying hardware + is found */ + int tab, bc; + + switch (cond) { + case TCG_COND_EQ: + tab = TAB (dest, v1, v2); + bc = CR_EQ; + break; + case TCG_COND_NE: + tab = TAB (dest, v2, v1); + bc = CR_EQ; + break; + case TCG_COND_LTU: + case TCG_COND_LT: + tab = TAB (dest, v1, v2); + bc = CR_LT; + break; + case TCG_COND_GEU: + case TCG_COND_GE: + tab = TAB (dest, v2, v1); + bc = CR_LT; + break; + case TCG_COND_LEU: + case TCG_COND_LE: + tab = TAB (dest, v2, v1); + bc = CR_GT; + break; + case TCG_COND_GTU: + case TCG_COND_GT: + tab = TAB (dest, v1, v2); + bc = CR_GT; + break; + default: + tcg_abort (); + } + tcg_out32 (s, ISEL | tab | ((bc + 28) << 6)); + } +} + static void tcg_out_brcond (TCGContext *s, TCGCond cond, TCGArg arg1, TCGArg arg2, int const_arg2, int label_index) @@ -1826,6 +1896,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, ); break; + case INDEX_op_movcond_i32: + tcg_out_movcond (s, args[5], args[0], + args[1], args[2], + args[3], args[4], + const_args[2]); + break; + default: tcg_dump_ops (s); tcg_abort (); @@ -1922,6 +1999,7 @@ static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_ext16u_i32, { "r", "r" } }, { INDEX_op_deposit_i32, { "r", "0", "r" } }, + { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "r" } }, { -1 }, }; diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index 177eea1..3259d89 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -92,7 +92,7 @@ typedef enum { #define TCG_TARGET_HAS_nand_i32 1 #define TCG_TARGET_HAS_nor_i32 1 #define TCG_TARGET_HAS_deposit_i32 1 -#define TCG_TARGET_HAS_movcond_i32 0 +#define TCG_TARGET_HAS_movcond_i32 1 #define TCG_AREG0 TCG_REG_R27