Message ID | alpine.DEB.2.02.1209111243300.32141@stedding.saclay.inria.fr |
---|---|
State | New |
Headers | show |
On Tue, Sep 11, 2012 at 1:07 PM, Marc Glisse <marc.glisse@inria.fr> wrote: > Hello, > > here is a patch that turns {v[1],v[0]} into vec_perm_expr(v,v,{1,0}) if the > target is ok with it. > > I am attaching 2 versions of the patch. p-good is the one that passes > testing. p-bad, where I rely on fold_stmt to detect identity permutations, > ICEs towards the end of the pass while checking a bogus gimple stmt (one > that gimple_debug_stmt crashes on if I call it in gdb). From a performance > point of view, p-good makes sense, but I liked the simplicity of p-bad and I > am confused as to why it fails. Probably because you cannot simply increase num_ops ... > 2012-09-11 Marc Glisse <marc.glisse@inria.fr> > > gcc/ > * tree-ssa-forwprop.c (simplify_vector_constructor): New function. > (ssa_forward_propagate_and_combine): Call it. > > gcc/testsuite/ > * gcc.dg/tree-ssa/forwprop-22.c: New testcase. > > -- > Marc Glisse > Index: Makefile.in > =================================================================== > --- Makefile.in (revision 191173) > +++ Makefile.in (working copy) > @@ -2237,21 +2237,22 @@ tree-outof-ssa.o : tree-outof-ssa.c $(TR > $(TREE_H) $(DIAGNOSTIC_H) $(TM_H) coretypes.h dumpfile.h \ > $(TREE_SSA_LIVE_H) $(BASIC_BLOCK_H) $(BITMAP_H) $(GGC_H) \ > $(EXPR_H) $(SSAEXPAND_H) $(GIMPLE_PRETTY_PRINT_H) > tree-ssa-dse.o : tree-ssa-dse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ > $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ > $(TREE_FLOW_H) $(TREE_PASS_H) domwalk.h $(FLAGS_H) \ > $(GIMPLE_PRETTY_PRINT_H) langhooks.h > tree-ssa-forwprop.o : tree-ssa-forwprop.c $(CONFIG_H) $(SYSTEM_H) > coretypes.h \ > $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) $(CFGLOOP_H) \ > $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \ > - langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H) > + langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H) \ > + $(TREE_VECTORIZER_H) > tree-ssa-phiprop.o : tree-ssa-phiprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h > \ > $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ > $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \ > langhooks.h $(FLAGS_H) $(GIMPLE_PRETTY_PRINT_H) > tree-ssa-ifcombine.o : tree-ssa-ifcombine.c $(CONFIG_H) $(SYSTEM_H) \ > coretypes.h $(TM_H) $(TREE_H) $(BASIC_BLOCK_H) \ > $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \ > $(TREE_PRETTY_PRINT_H) > tree-ssa-phiopt.o : tree-ssa-phiopt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ > $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ > Index: testsuite/gcc.dg/tree-ssa/forwprop-22.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/forwprop-22.c (revision 0) > +++ testsuite/gcc.dg/tree-ssa/forwprop-22.c (revision 0) > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target vect_double } */ > +/* { dg-require-effective-target vect_perm } */ > +/* { dg-options "-O -fdump-tree-optimized" } */ > + > +typedef double vec __attribute__((vector_size (2 * sizeof (double)))); > +void f (vec *px, vec *y, vec *z) > +{ > + vec x = *px; > + vec t1 = { x[1], x[0] }; > + vec t2 = { x[0], x[1] }; > + *y = t1; > + *z = t2; > +} > + > +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "optimized" } } */ > +/* { dg-final { scan-tree-dump-not "BIT_FIELD_REF" "optimized" } } */ > +/* { dg-final { cleanup-tree-dump "optimized" } } */ > > Property changes on: testsuite/gcc.dg/tree-ssa/forwprop-22.c > ___________________________________________________________________ > Added: svn:keywords > + Author Date Id Revision URL > Added: svn:eol-style > + native > > Index: tree-ssa-forwprop.c > =================================================================== > --- tree-ssa-forwprop.c (revision 191173) > +++ tree-ssa-forwprop.c (working copy) > @@ -26,20 +26,21 @@ along with GCC; see the file COPYING3. > #include "tm_p.h" > #include "basic-block.h" > #include "gimple-pretty-print.h" > #include "tree-flow.h" > #include "tree-pass.h" > #include "langhooks.h" > #include "flags.h" > #include "gimple.h" > #include "expr.h" > #include "cfgloop.h" > +#include "tree-vectorizer.h" > > /* This pass propagates the RHS of assignment statements into use > sites of the LHS of the assignment. It's basically a specialized > form of tree combination. It is hoped all of this can disappear > when we have a generalized tree combiner. > > One class of common cases we handle is forward propagating a single use > variable into a COND_EXPR. > > bb0: > @@ -2787,20 +2788,105 @@ simplify_permutation (gimple_stmt_iterat > if (TREE_CODE (op0) == SSA_NAME) > ret = remove_prop_source_from_use (op0); > if (op0 != op1 && TREE_CODE (op1) == SSA_NAME) > ret |= remove_prop_source_from_use (op1); > return ret ? 2 : 1; > } > > return 0; > } > > +/* Recognize a VEC_PERM_EXPR. Returns true if there were any changes. */ > + > +static bool > +simplify_vector_constructor (gimple_stmt_iterator *gsi) > +{ > + gimple stmt = gsi_stmt (*gsi); > + gimple def_stmt; > + tree op, op2, orig, type, elem_type; > + unsigned elem_size, nelts, i; > + enum tree_code code; > + constructor_elt *elt; > + unsigned char *sel; > + bool maybe_ident; > + > + gcc_checking_assert (gimple_assign_rhs_code (stmt) == CONSTRUCTOR); > + > + op = gimple_assign_rhs1 (stmt); > + type = TREE_TYPE (op); > + gcc_checking_assert (TREE_CODE (type) == VECTOR_TYPE); > + > + nelts = TYPE_VECTOR_SUBPARTS (type); > + elem_type = TREE_TYPE (type); > + elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type)); > + > + sel = XALLOCAVEC (unsigned char, nelts); > + orig = NULL; > + maybe_ident = true; > + FOR_EACH_VEC_ELT (constructor_elt, CONSTRUCTOR_ELTS (op), i, elt) > + { > + tree ref, op1; > + > + if (i >= nelts) > + return false; > + > + if (TREE_CODE (elt->value) != SSA_NAME) > + return false; > + def_stmt = SSA_NAME_DEF_STMT (elt->value); > + if (!def_stmt || !is_gimple_assign (def_stmt)) > + return false; > + code = gimple_assign_rhs_code (def_stmt); > + if (code != BIT_FIELD_REF) > + return false; > + op1 = gimple_assign_rhs1 (def_stmt); > + ref = TREE_OPERAND (op1, 0); > + if (orig) > + { > + if (ref != orig) > + return false; > + } > + else > + { > + if (TREE_CODE (ref) != SSA_NAME) > + return false; > + orig = ref; > + } > + if (TREE_INT_CST_LOW (TREE_OPERAND (op1, 1)) != elem_size) > + return false; > + sel[i] = TREE_INT_CST_LOW (TREE_OPERAND (op1, 2)) / elem_size; > + if (sel[i] != i) maybe_ident = false; > + } > + if (i < nelts) > + return false; > + > + if (maybe_ident) > + { > + gimple_assign_set_rhs1 (stmt, unshare_expr (orig)); > + gimple_set_num_ops (stmt, 2); > + gimple_assign_set_rhs_code (stmt, TREE_CODE (orig)); > + update_stmt (stmt); You should either use gimple_assign_set_rhs_from_tree (like here - no need to unshare_expr SSA names) > + return true; > + } > + > + op2 = vect_gen_perm_mask (type, sel); > + if (!op2) > + return false; > + orig = unshare_expr (orig); Likewise. > + gimple_assign_set_rhs_code (stmt, VEC_PERM_EXPR); > + gimple_set_num_ops (stmt, 4); > + gimple_assign_set_rhs1 (stmt, orig); > + gimple_assign_set_rhs2 (stmt, orig); > + gimple_assign_set_rhs3 (stmt, op2); ... or gimple_assign_set_rhs_with_ops, like here. That will re-allocate the stmt if necessary. Ok with that change. Thanks, Richard. > + update_stmt (stmt); > + return true; > +} > + > /* Main entry point for the forward propagation and statement combine > optimizer. */ > > static unsigned int > ssa_forward_propagate_and_combine (void) > { > basic_block bb; > unsigned int todoflags = 0; > > cfg_changed = false; > @@ -2958,20 +3044,23 @@ ssa_forward_propagate_and_combine (void) > } > else if (code == VEC_PERM_EXPR) > { > int did_something = simplify_permutation (&gsi); > if (did_something == 2) > cfg_changed = true; > changed = did_something != 0; > } > else if (code == BIT_FIELD_REF) > changed = simplify_bitfield_ref (&gsi); > + else if (code == CONSTRUCTOR > + && TREE_CODE (TREE_TYPE (rhs1)) == VECTOR_TYPE) > + changed = simplify_vector_constructor (&gsi); > break; > } > > case GIMPLE_SWITCH: > changed = simplify_gimple_switch (stmt); > break; > > case GIMPLE_COND: > { > int did_something; > > Index: Makefile.in > =================================================================== > --- Makefile.in (revision 191173) > +++ Makefile.in (working copy) > @@ -2237,21 +2237,22 @@ tree-outof-ssa.o : tree-outof-ssa.c $(TR > $(TREE_H) $(DIAGNOSTIC_H) $(TM_H) coretypes.h dumpfile.h \ > $(TREE_SSA_LIVE_H) $(BASIC_BLOCK_H) $(BITMAP_H) $(GGC_H) \ > $(EXPR_H) $(SSAEXPAND_H) $(GIMPLE_PRETTY_PRINT_H) > tree-ssa-dse.o : tree-ssa-dse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ > $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ > $(TREE_FLOW_H) $(TREE_PASS_H) domwalk.h $(FLAGS_H) \ > $(GIMPLE_PRETTY_PRINT_H) langhooks.h > tree-ssa-forwprop.o : tree-ssa-forwprop.c $(CONFIG_H) $(SYSTEM_H) > coretypes.h \ > $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) $(CFGLOOP_H) \ > $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \ > - langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H) > + langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H) \ > + $(TREE_VECTORIZER_H) > tree-ssa-phiprop.o : tree-ssa-phiprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h > \ > $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ > $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \ > langhooks.h $(FLAGS_H) $(GIMPLE_PRETTY_PRINT_H) > tree-ssa-ifcombine.o : tree-ssa-ifcombine.c $(CONFIG_H) $(SYSTEM_H) \ > coretypes.h $(TM_H) $(TREE_H) $(BASIC_BLOCK_H) \ > $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \ > $(TREE_PRETTY_PRINT_H) > tree-ssa-phiopt.o : tree-ssa-phiopt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ > $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ > Index: testsuite/gcc.dg/tree-ssa/forwprop-22.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/forwprop-22.c (revision 0) > +++ testsuite/gcc.dg/tree-ssa/forwprop-22.c (revision 0) > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target vect_double } */ > +/* { dg-require-effective-target vect_perm } */ > +/* { dg-options "-O -fdump-tree-optimized" } */ > + > +typedef double vec __attribute__((vector_size (2 * sizeof (double)))); > +void f (vec *px, vec *y, vec *z) > +{ > + vec x = *px; > + vec t1 = { x[1], x[0] }; > + vec t2 = { x[0], x[1] }; > + *y = t1; > + *z = t2; > +} > + > +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "optimized" } } */ > +/* { dg-final { scan-tree-dump-not "BIT_FIELD_REF" "optimized" } } */ > +/* { dg-final { cleanup-tree-dump "optimized" } } */ > > Property changes on: testsuite/gcc.dg/tree-ssa/forwprop-22.c > ___________________________________________________________________ > Added: svn:eol-style > + native > Added: svn:keywords > + Author Date Id Revision URL > > Index: tree-ssa-forwprop.c > =================================================================== > --- tree-ssa-forwprop.c (revision 191173) > +++ tree-ssa-forwprop.c (working copy) > @@ -26,20 +26,21 @@ along with GCC; see the file COPYING3. > #include "tm_p.h" > #include "basic-block.h" > #include "gimple-pretty-print.h" > #include "tree-flow.h" > #include "tree-pass.h" > #include "langhooks.h" > #include "flags.h" > #include "gimple.h" > #include "expr.h" > #include "cfgloop.h" > +#include "tree-vectorizer.h" > > /* This pass propagates the RHS of assignment statements into use > sites of the LHS of the assignment. It's basically a specialized > form of tree combination. It is hoped all of this can disappear > when we have a generalized tree combiner. > > One class of common cases we handle is forward propagating a single use > variable into a COND_EXPR. > > bb0: > @@ -2787,20 +2788,94 @@ simplify_permutation (gimple_stmt_iterat > if (TREE_CODE (op0) == SSA_NAME) > ret = remove_prop_source_from_use (op0); > if (op0 != op1 && TREE_CODE (op1) == SSA_NAME) > ret |= remove_prop_source_from_use (op1); > return ret ? 2 : 1; > } > > return 0; > } > > +/* Recognize a VEC_PERM_EXPR. Returns true if there were any changes. */ > + > +static bool > +simplify_vector_constructor (gimple_stmt_iterator *gsi) > +{ > + gimple stmt = gsi_stmt (*gsi); > + gimple def_stmt; > + tree op, op2, orig, type, elem_type; > + unsigned elem_size, nelts, i; > + enum tree_code code; > + constructor_elt *elt; > + unsigned char *sel; > + > + gcc_checking_assert (gimple_assign_rhs_code (stmt) == CONSTRUCTOR); > + > + op = gimple_assign_rhs1 (stmt); > + type = TREE_TYPE (op); > + gcc_checking_assert (TREE_CODE (type) == VECTOR_TYPE); > + > + nelts = TYPE_VECTOR_SUBPARTS (type); > + elem_type = TREE_TYPE (type); > + elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type)); > + > + sel = XALLOCAVEC (unsigned char, nelts); > + orig = NULL; > + FOR_EACH_VEC_ELT (constructor_elt, CONSTRUCTOR_ELTS (op), i, elt) > + { > + tree ref, op1; > + > + if (i >= nelts) > + return false; > + > + if (TREE_CODE (elt->value) != SSA_NAME) > + return false; > + def_stmt = SSA_NAME_DEF_STMT (elt->value); > + if (!def_stmt || !is_gimple_assign (def_stmt)) > + return false; > + code = gimple_assign_rhs_code (def_stmt); > + if (code != BIT_FIELD_REF) > + return false; > + op1 = gimple_assign_rhs1 (def_stmt); > + ref = TREE_OPERAND (op1, 0); > + if (orig) > + { > + if (ref != orig) > + return false; > + } > + else > + { > + if (TREE_CODE (ref) != SSA_NAME) > + return false; > + orig = ref; > + } > + if (TREE_INT_CST_LOW (TREE_OPERAND (op1, 1)) != elem_size) > + return false; > + sel[i] = TREE_INT_CST_LOW (TREE_OPERAND (op1, 2)) / elem_size; > + } > + if (i < nelts) > + return false; > + > + op2 = vect_gen_perm_mask (type, sel); > + if (!op2) > + return false; > + orig = unshare_expr (orig); > + gimple_assign_set_rhs_code (stmt, VEC_PERM_EXPR); > + gimple_set_num_ops (stmt, 4); > + gimple_assign_set_rhs1 (stmt, orig); > + gimple_assign_set_rhs2 (stmt, orig); > + gimple_assign_set_rhs3 (stmt, op2); > + fold_stmt (gsi); > + update_stmt (gsi_stmt (*gsi)); > + return true; > +} > + > /* Main entry point for the forward propagation and statement combine > optimizer. */ > > static unsigned int > ssa_forward_propagate_and_combine (void) > { > basic_block bb; > unsigned int todoflags = 0; > > cfg_changed = false; > @@ -2958,20 +3033,23 @@ ssa_forward_propagate_and_combine (void) > } > else if (code == VEC_PERM_EXPR) > { > int did_something = simplify_permutation (&gsi); > if (did_something == 2) > cfg_changed = true; > changed = did_something != 0; > } > else if (code == BIT_FIELD_REF) > changed = simplify_bitfield_ref (&gsi); > + else if (code == CONSTRUCTOR > + && TREE_CODE (TREE_TYPE (rhs1)) == VECTOR_TYPE) > + changed = simplify_vector_constructor (&gsi); > break; > } > > case GIMPLE_SWITCH: > changed = simplify_gimple_switch (stmt); > break; > > case GIMPLE_COND: > { > int did_something; >
On Tue, 11 Sep 2012, Richard Guenther wrote: > On Tue, Sep 11, 2012 at 1:07 PM, Marc Glisse <marc.glisse@inria.fr> wrote: >> Hello, >> >> here is a patch that turns {v[1],v[0]} into vec_perm_expr(v,v,{1,0}) if the >> target is ok with it. >> >> I am attaching 2 versions of the patch. p-good is the one that passes >> testing. p-bad, where I rely on fold_stmt to detect identity permutations, >> ICEs towards the end of the pass while checking a bogus gimple stmt (one >> that gimple_debug_stmt crashes on if I call it in gdb). From a performance >> point of view, p-good makes sense, but I liked the simplicity of p-bad and I >> am confused as to why it fails. > > Probably because you cannot simply increase num_ops ... Ah... thanks, it makes sense now... For some reason I thought it was a fixed size structure and num_ops just told it how many of the fields were in use. [...] > Ok with that change. Just to be sure, that means you prefer the version where I manually detect identity and don't call fold, right? Thank you for all the quick reviews,
Index: Makefile.in =================================================================== --- Makefile.in (revision 191173) +++ Makefile.in (working copy) @@ -2237,21 +2237,22 @@ tree-outof-ssa.o : tree-outof-ssa.c $(TR $(TREE_H) $(DIAGNOSTIC_H) $(TM_H) coretypes.h dumpfile.h \ $(TREE_SSA_LIVE_H) $(BASIC_BLOCK_H) $(BITMAP_H) $(GGC_H) \ $(EXPR_H) $(SSAEXPAND_H) $(GIMPLE_PRETTY_PRINT_H) tree-ssa-dse.o : tree-ssa-dse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ $(TREE_FLOW_H) $(TREE_PASS_H) domwalk.h $(FLAGS_H) \ $(GIMPLE_PRETTY_PRINT_H) langhooks.h tree-ssa-forwprop.o : tree-ssa-forwprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) $(CFGLOOP_H) \ $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \ - langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H) + langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H) \ + $(TREE_VECTORIZER_H) tree-ssa-phiprop.o : tree-ssa-phiprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \ langhooks.h $(FLAGS_H) $(GIMPLE_PRETTY_PRINT_H) tree-ssa-ifcombine.o : tree-ssa-ifcombine.c $(CONFIG_H) $(SYSTEM_H) \ coretypes.h $(TM_H) $(TREE_H) $(BASIC_BLOCK_H) \ $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \ $(TREE_PRETTY_PRINT_H) tree-ssa-phiopt.o : tree-ssa-phiopt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ Index: testsuite/gcc.dg/tree-ssa/forwprop-22.c =================================================================== --- testsuite/gcc.dg/tree-ssa/forwprop-22.c (revision 0) +++ testsuite/gcc.dg/tree-ssa/forwprop-22.c (revision 0) @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_double } */ +/* { dg-require-effective-target vect_perm } */ +/* { dg-options "-O -fdump-tree-optimized" } */ + +typedef double vec __attribute__((vector_size (2 * sizeof (double)))); +void f (vec *px, vec *y, vec *z) +{ + vec x = *px; + vec t1 = { x[1], x[0] }; + vec t2 = { x[0], x[1] }; + *y = t1; + *z = t2; +} + +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-not "BIT_FIELD_REF" "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Property changes on: testsuite/gcc.dg/tree-ssa/forwprop-22.c ___________________________________________________________________ Added: svn:keywords + Author Date Id Revision URL Added: svn:eol-style + native Index: tree-ssa-forwprop.c =================================================================== --- tree-ssa-forwprop.c (revision 191173) +++ tree-ssa-forwprop.c (working copy) @@ -26,20 +26,21 @@ along with GCC; see the file COPYING3. #include "tm_p.h" #include "basic-block.h" #include "gimple-pretty-print.h" #include "tree-flow.h" #include "tree-pass.h" #include "langhooks.h" #include "flags.h" #include "gimple.h" #include "expr.h" #include "cfgloop.h" +#include "tree-vectorizer.h" /* This pass propagates the RHS of assignment statements into use sites of the LHS of the assignment. It's basically a specialized form of tree combination. It is hoped all of this can disappear when we have a generalized tree combiner. One class of common cases we handle is forward propagating a single use variable into a COND_EXPR. bb0: @@ -2787,20 +2788,105 @@ simplify_permutation (gimple_stmt_iterat if (TREE_CODE (op0) == SSA_NAME) ret = remove_prop_source_from_use (op0); if (op0 != op1 && TREE_CODE (op1) == SSA_NAME) ret |= remove_prop_source_from_use (op1); return ret ? 2 : 1; } return 0; } +/* Recognize a VEC_PERM_EXPR. Returns true if there were any changes. */ + +static bool +simplify_vector_constructor (gimple_stmt_iterator *gsi) +{ + gimple stmt = gsi_stmt (*gsi); + gimple def_stmt; + tree op, op2, orig, type, elem_type; + unsigned elem_size, nelts, i; + enum tree_code code; + constructor_elt *elt; + unsigned char *sel; + bool maybe_ident; + + gcc_checking_assert (gimple_assign_rhs_code (stmt) == CONSTRUCTOR); + + op = gimple_assign_rhs1 (stmt); + type = TREE_TYPE (op); + gcc_checking_assert (TREE_CODE (type) == VECTOR_TYPE); + + nelts = TYPE_VECTOR_SUBPARTS (type); + elem_type = TREE_TYPE (type); + elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type)); + + sel = XALLOCAVEC (unsigned char, nelts); + orig = NULL; + maybe_ident = true; + FOR_EACH_VEC_ELT (constructor_elt, CONSTRUCTOR_ELTS (op), i, elt) + { + tree ref, op1; + + if (i >= nelts) + return false; + + if (TREE_CODE (elt->value) != SSA_NAME) + return false; + def_stmt = SSA_NAME_DEF_STMT (elt->value); + if (!def_stmt || !is_gimple_assign (def_stmt)) + return false; + code = gimple_assign_rhs_code (def_stmt); + if (code != BIT_FIELD_REF) + return false; + op1 = gimple_assign_rhs1 (def_stmt); + ref = TREE_OPERAND (op1, 0); + if (orig) + { + if (ref != orig) + return false; + } + else + { + if (TREE_CODE (ref) != SSA_NAME) + return false; + orig = ref; + } + if (TREE_INT_CST_LOW (TREE_OPERAND (op1, 1)) != elem_size) + return false; + sel[i] = TREE_INT_CST_LOW (TREE_OPERAND (op1, 2)) / elem_size; + if (sel[i] != i) maybe_ident = false; + } + if (i < nelts) + return false; + + if (maybe_ident) + { + gimple_assign_set_rhs1 (stmt, unshare_expr (orig)); + gimple_set_num_ops (stmt, 2); + gimple_assign_set_rhs_code (stmt, TREE_CODE (orig)); + update_stmt (stmt); + return true; + } + + op2 = vect_gen_perm_mask (type, sel); + if (!op2) + return false; + orig = unshare_expr (orig); + gimple_assign_set_rhs_code (stmt, VEC_PERM_EXPR); + gimple_set_num_ops (stmt, 4); + gimple_assign_set_rhs1 (stmt, orig); + gimple_assign_set_rhs2 (stmt, orig); + gimple_assign_set_rhs3 (stmt, op2); + update_stmt (stmt); + return true; +} + /* Main entry point for the forward propagation and statement combine optimizer. */ static unsigned int ssa_forward_propagate_and_combine (void) { basic_block bb; unsigned int todoflags = 0; cfg_changed = false; @@ -2958,20 +3044,23 @@ ssa_forward_propagate_and_combine (void) } else if (code == VEC_PERM_EXPR) { int did_something = simplify_permutation (&gsi); if (did_something == 2) cfg_changed = true; changed = did_something != 0; } else if (code == BIT_FIELD_REF) changed = simplify_bitfield_ref (&gsi); + else if (code == CONSTRUCTOR + && TREE_CODE (TREE_TYPE (rhs1)) == VECTOR_TYPE) + changed = simplify_vector_constructor (&gsi); break; } case GIMPLE_SWITCH: changed = simplify_gimple_switch (stmt); break; case GIMPLE_COND: { int did_something;