diff mbox

Recognize vec_perm_expr in a constructor of bit_field_ref

Message ID alpine.DEB.2.02.1209111243300.32141@stedding.saclay.inria.fr
State New
Headers show

Commit Message

Marc Glisse Sept. 11, 2012, 11:07 a.m. UTC
Hello,

here is a patch that turns {v[1],v[0]} into vec_perm_expr(v,v,{1,0}) if 
the target is ok with it.

I am attaching 2 versions of the patch. p-good is the one that passes 
testing. p-bad, where I rely on fold_stmt to detect identity permutations, 
ICEs towards the end of the pass while checking a bogus gimple stmt (one 
that gimple_debug_stmt crashes on if I call it in gdb). From a performance 
point of view, p-good makes sense, but I liked the simplicity of p-bad and 
I am confused as to why it fails.

2012-09-11  Marc Glisse  <marc.glisse@inria.fr>

gcc/
 	* tree-ssa-forwprop.c (simplify_vector_constructor): New function.
 	(ssa_forward_propagate_and_combine): Call it.

gcc/testsuite/
 	* gcc.dg/tree-ssa/forwprop-22.c: New testcase.

Comments

Richard Biener Sept. 11, 2012, 2:16 p.m. UTC | #1
On Tue, Sep 11, 2012 at 1:07 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> Hello,
>
> here is a patch that turns {v[1],v[0]} into vec_perm_expr(v,v,{1,0}) if the
> target is ok with it.
>
> I am attaching 2 versions of the patch. p-good is the one that passes
> testing. p-bad, where I rely on fold_stmt to detect identity permutations,
> ICEs towards the end of the pass while checking a bogus gimple stmt (one
> that gimple_debug_stmt crashes on if I call it in gdb). From a performance
> point of view, p-good makes sense, but I liked the simplicity of p-bad and I
> am confused as to why it fails.

Probably because you cannot simply increase num_ops ...

> 2012-09-11  Marc Glisse  <marc.glisse@inria.fr>
>
> gcc/
>         * tree-ssa-forwprop.c (simplify_vector_constructor): New function.
>         (ssa_forward_propagate_and_combine): Call it.
>
> gcc/testsuite/
>         * gcc.dg/tree-ssa/forwprop-22.c: New testcase.
>
> --
> Marc Glisse
> Index: Makefile.in
> ===================================================================
> --- Makefile.in (revision 191173)
> +++ Makefile.in (working copy)
> @@ -2237,21 +2237,22 @@ tree-outof-ssa.o : tree-outof-ssa.c $(TR
>     $(TREE_H) $(DIAGNOSTIC_H) $(TM_H) coretypes.h dumpfile.h \
>     $(TREE_SSA_LIVE_H) $(BASIC_BLOCK_H) $(BITMAP_H) $(GGC_H) \
>     $(EXPR_H) $(SSAEXPAND_H) $(GIMPLE_PRETTY_PRINT_H)
>  tree-ssa-dse.o : tree-ssa-dse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
>     $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
>     $(TREE_FLOW_H) $(TREE_PASS_H) domwalk.h $(FLAGS_H) \
>     $(GIMPLE_PRETTY_PRINT_H) langhooks.h
>  tree-ssa-forwprop.o : tree-ssa-forwprop.c $(CONFIG_H) $(SYSTEM_H)
> coretypes.h \
>     $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) $(CFGLOOP_H) \
>     $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \
> -   langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H)
> +   langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H) \
> +   $(TREE_VECTORIZER_H)
>  tree-ssa-phiprop.o : tree-ssa-phiprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h
> \
>     $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
>     $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \
>     langhooks.h $(FLAGS_H) $(GIMPLE_PRETTY_PRINT_H)
>  tree-ssa-ifcombine.o : tree-ssa-ifcombine.c $(CONFIG_H) $(SYSTEM_H) \
>     coretypes.h $(TM_H) $(TREE_H) $(BASIC_BLOCK_H) \
>     $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \
>     $(TREE_PRETTY_PRINT_H)
>  tree-ssa-phiopt.o : tree-ssa-phiopt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
>     $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
> Index: testsuite/gcc.dg/tree-ssa/forwprop-22.c
> ===================================================================
> --- testsuite/gcc.dg/tree-ssa/forwprop-22.c     (revision 0)
> +++ testsuite/gcc.dg/tree-ssa/forwprop-22.c     (revision 0)
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target vect_double } */
> +/* { dg-require-effective-target vect_perm } */
> +/* { dg-options "-O -fdump-tree-optimized" } */
> +
> +typedef double vec __attribute__((vector_size (2 * sizeof (double))));
> +void f (vec *px, vec *y, vec *z)
> +{
> +  vec x = *px;
> +  vec t1 = { x[1], x[0] };
> +  vec t2 = { x[0], x[1] };
> +  *y = t1;
> +  *z = t2;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "optimized" } } */
> +/* { dg-final { scan-tree-dump-not "BIT_FIELD_REF" "optimized" } } */
> +/* { dg-final { cleanup-tree-dump "optimized" } } */
>
> Property changes on: testsuite/gcc.dg/tree-ssa/forwprop-22.c
> ___________________________________________________________________
> Added: svn:keywords
>    + Author Date Id Revision URL
> Added: svn:eol-style
>    + native
>
> Index: tree-ssa-forwprop.c
> ===================================================================
> --- tree-ssa-forwprop.c (revision 191173)
> +++ tree-ssa-forwprop.c (working copy)
> @@ -26,20 +26,21 @@ along with GCC; see the file COPYING3.
>  #include "tm_p.h"
>  #include "basic-block.h"
>  #include "gimple-pretty-print.h"
>  #include "tree-flow.h"
>  #include "tree-pass.h"
>  #include "langhooks.h"
>  #include "flags.h"
>  #include "gimple.h"
>  #include "expr.h"
>  #include "cfgloop.h"
> +#include "tree-vectorizer.h"
>
>  /* This pass propagates the RHS of assignment statements into use
>     sites of the LHS of the assignment.  It's basically a specialized
>     form of tree combination.   It is hoped all of this can disappear
>     when we have a generalized tree combiner.
>
>     One class of common cases we handle is forward propagating a single use
>     variable into a COND_EXPR.
>
>       bb0:
> @@ -2787,20 +2788,105 @@ simplify_permutation (gimple_stmt_iterat
>        if (TREE_CODE (op0) == SSA_NAME)
>         ret = remove_prop_source_from_use (op0);
>        if (op0 != op1 && TREE_CODE (op1) == SSA_NAME)
>         ret |= remove_prop_source_from_use (op1);
>        return ret ? 2 : 1;
>      }
>
>    return 0;
>  }
>
> +/* Recognize a VEC_PERM_EXPR.  Returns true if there were any changes.  */
> +
> +static bool
> +simplify_vector_constructor (gimple_stmt_iterator *gsi)
> +{
> +  gimple stmt = gsi_stmt (*gsi);
> +  gimple def_stmt;
> +  tree op, op2, orig, type, elem_type;
> +  unsigned elem_size, nelts, i;
> +  enum tree_code code;
> +  constructor_elt *elt;
> +  unsigned char *sel;
> +  bool maybe_ident;
> +
> +  gcc_checking_assert (gimple_assign_rhs_code (stmt) == CONSTRUCTOR);
> +
> +  op = gimple_assign_rhs1 (stmt);
> +  type = TREE_TYPE (op);
> +  gcc_checking_assert (TREE_CODE (type) == VECTOR_TYPE);
> +
> +  nelts = TYPE_VECTOR_SUBPARTS (type);
> +  elem_type = TREE_TYPE (type);
> +  elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
> +
> +  sel = XALLOCAVEC (unsigned char, nelts);
> +  orig = NULL;
> +  maybe_ident = true;
> +  FOR_EACH_VEC_ELT (constructor_elt, CONSTRUCTOR_ELTS (op), i, elt)
> +    {
> +      tree ref, op1;
> +
> +      if (i >= nelts)
> +       return false;
> +
> +      if (TREE_CODE (elt->value) != SSA_NAME)
> +       return false;
> +      def_stmt = SSA_NAME_DEF_STMT (elt->value);
> +      if (!def_stmt || !is_gimple_assign (def_stmt))
> +       return false;
> +      code = gimple_assign_rhs_code (def_stmt);
> +      if (code != BIT_FIELD_REF)
> +       return false;
> +      op1 = gimple_assign_rhs1 (def_stmt);
> +      ref = TREE_OPERAND (op1, 0);
> +      if (orig)
> +       {
> +         if (ref != orig)
> +           return false;
> +       }
> +      else
> +       {
> +         if (TREE_CODE (ref) != SSA_NAME)
> +           return false;
> +         orig = ref;
> +       }
> +      if (TREE_INT_CST_LOW (TREE_OPERAND (op1, 1)) != elem_size)
> +       return false;
> +      sel[i] = TREE_INT_CST_LOW (TREE_OPERAND (op1, 2)) / elem_size;
> +      if (sel[i] != i) maybe_ident = false;
> +    }
> +  if (i < nelts)
> +    return false;
> +
> +  if (maybe_ident)
> +    {
> +      gimple_assign_set_rhs1 (stmt, unshare_expr (orig));
> +      gimple_set_num_ops (stmt, 2);
> +      gimple_assign_set_rhs_code (stmt, TREE_CODE (orig));
> +      update_stmt (stmt);

You should either use gimple_assign_set_rhs_from_tree (like here - no
need to unshare_expr SSA names)

> +      return true;
> +    }
> +
> +  op2 = vect_gen_perm_mask (type, sel);
> +  if (!op2)
> +    return false;
> +  orig = unshare_expr (orig);

Likewise.

> +  gimple_assign_set_rhs_code (stmt, VEC_PERM_EXPR);
> +  gimple_set_num_ops (stmt, 4);
> +  gimple_assign_set_rhs1 (stmt, orig);
> +  gimple_assign_set_rhs2 (stmt, orig);
> +  gimple_assign_set_rhs3 (stmt, op2);

... or gimple_assign_set_rhs_with_ops, like here.  That will re-allocate the
stmt if necessary.

Ok with that change.

Thanks,
Richard.

> +  update_stmt (stmt);
> +  return true;
> +}
> +
>  /* Main entry point for the forward propagation and statement combine
>     optimizer.  */
>
>  static unsigned int
>  ssa_forward_propagate_and_combine (void)
>  {
>    basic_block bb;
>    unsigned int todoflags = 0;
>
>    cfg_changed = false;
> @@ -2958,20 +3044,23 @@ ssa_forward_propagate_and_combine (void)
>                   }
>                 else if (code == VEC_PERM_EXPR)
>                   {
>                     int did_something = simplify_permutation (&gsi);
>                     if (did_something == 2)
>                       cfg_changed = true;
>                     changed = did_something != 0;
>                   }
>                 else if (code == BIT_FIELD_REF)
>                   changed = simplify_bitfield_ref (&gsi);
> +                else if (code == CONSTRUCTOR
> +                         && TREE_CODE (TREE_TYPE (rhs1)) == VECTOR_TYPE)
> +                  changed = simplify_vector_constructor (&gsi);
>                 break;
>               }
>
>             case GIMPLE_SWITCH:
>               changed = simplify_gimple_switch (stmt);
>               break;
>
>             case GIMPLE_COND:
>               {
>                 int did_something;
>
> Index: Makefile.in
> ===================================================================
> --- Makefile.in (revision 191173)
> +++ Makefile.in (working copy)
> @@ -2237,21 +2237,22 @@ tree-outof-ssa.o : tree-outof-ssa.c $(TR
>     $(TREE_H) $(DIAGNOSTIC_H) $(TM_H) coretypes.h dumpfile.h \
>     $(TREE_SSA_LIVE_H) $(BASIC_BLOCK_H) $(BITMAP_H) $(GGC_H) \
>     $(EXPR_H) $(SSAEXPAND_H) $(GIMPLE_PRETTY_PRINT_H)
>  tree-ssa-dse.o : tree-ssa-dse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
>     $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
>     $(TREE_FLOW_H) $(TREE_PASS_H) domwalk.h $(FLAGS_H) \
>     $(GIMPLE_PRETTY_PRINT_H) langhooks.h
>  tree-ssa-forwprop.o : tree-ssa-forwprop.c $(CONFIG_H) $(SYSTEM_H)
> coretypes.h \
>     $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) $(CFGLOOP_H) \
>     $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \
> -   langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H)
> +   langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H) \
> +   $(TREE_VECTORIZER_H)
>  tree-ssa-phiprop.o : tree-ssa-phiprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h
> \
>     $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
>     $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \
>     langhooks.h $(FLAGS_H) $(GIMPLE_PRETTY_PRINT_H)
>  tree-ssa-ifcombine.o : tree-ssa-ifcombine.c $(CONFIG_H) $(SYSTEM_H) \
>     coretypes.h $(TM_H) $(TREE_H) $(BASIC_BLOCK_H) \
>     $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \
>     $(TREE_PRETTY_PRINT_H)
>  tree-ssa-phiopt.o : tree-ssa-phiopt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
>     $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
> Index: testsuite/gcc.dg/tree-ssa/forwprop-22.c
> ===================================================================
> --- testsuite/gcc.dg/tree-ssa/forwprop-22.c     (revision 0)
> +++ testsuite/gcc.dg/tree-ssa/forwprop-22.c     (revision 0)
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target vect_double } */
> +/* { dg-require-effective-target vect_perm } */
> +/* { dg-options "-O -fdump-tree-optimized" } */
> +
> +typedef double vec __attribute__((vector_size (2 * sizeof (double))));
> +void f (vec *px, vec *y, vec *z)
> +{
> +  vec x = *px;
> +  vec t1 = { x[1], x[0] };
> +  vec t2 = { x[0], x[1] };
> +  *y = t1;
> +  *z = t2;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "optimized" } } */
> +/* { dg-final { scan-tree-dump-not "BIT_FIELD_REF" "optimized" } } */
> +/* { dg-final { cleanup-tree-dump "optimized" } } */
>
> Property changes on: testsuite/gcc.dg/tree-ssa/forwprop-22.c
> ___________________________________________________________________
> Added: svn:eol-style
>    + native
> Added: svn:keywords
>    + Author Date Id Revision URL
>
> Index: tree-ssa-forwprop.c
> ===================================================================
> --- tree-ssa-forwprop.c (revision 191173)
> +++ tree-ssa-forwprop.c (working copy)
> @@ -26,20 +26,21 @@ along with GCC; see the file COPYING3.
>  #include "tm_p.h"
>  #include "basic-block.h"
>  #include "gimple-pretty-print.h"
>  #include "tree-flow.h"
>  #include "tree-pass.h"
>  #include "langhooks.h"
>  #include "flags.h"
>  #include "gimple.h"
>  #include "expr.h"
>  #include "cfgloop.h"
> +#include "tree-vectorizer.h"
>
>  /* This pass propagates the RHS of assignment statements into use
>     sites of the LHS of the assignment.  It's basically a specialized
>     form of tree combination.   It is hoped all of this can disappear
>     when we have a generalized tree combiner.
>
>     One class of common cases we handle is forward propagating a single use
>     variable into a COND_EXPR.
>
>       bb0:
> @@ -2787,20 +2788,94 @@ simplify_permutation (gimple_stmt_iterat
>        if (TREE_CODE (op0) == SSA_NAME)
>         ret = remove_prop_source_from_use (op0);
>        if (op0 != op1 && TREE_CODE (op1) == SSA_NAME)
>         ret |= remove_prop_source_from_use (op1);
>        return ret ? 2 : 1;
>      }
>
>    return 0;
>  }
>
> +/* Recognize a VEC_PERM_EXPR.  Returns true if there were any changes.  */
> +
> +static bool
> +simplify_vector_constructor (gimple_stmt_iterator *gsi)
> +{
> +  gimple stmt = gsi_stmt (*gsi);
> +  gimple def_stmt;
> +  tree op, op2, orig, type, elem_type;
> +  unsigned elem_size, nelts, i;
> +  enum tree_code code;
> +  constructor_elt *elt;
> +  unsigned char *sel;
> +
> +  gcc_checking_assert (gimple_assign_rhs_code (stmt) == CONSTRUCTOR);
> +
> +  op = gimple_assign_rhs1 (stmt);
> +  type = TREE_TYPE (op);
> +  gcc_checking_assert (TREE_CODE (type) == VECTOR_TYPE);
> +
> +  nelts = TYPE_VECTOR_SUBPARTS (type);
> +  elem_type = TREE_TYPE (type);
> +  elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
> +
> +  sel = XALLOCAVEC (unsigned char, nelts);
> +  orig = NULL;
> +  FOR_EACH_VEC_ELT (constructor_elt, CONSTRUCTOR_ELTS (op), i, elt)
> +    {
> +      tree ref, op1;
> +
> +      if (i >= nelts)
> +       return false;
> +
> +      if (TREE_CODE (elt->value) != SSA_NAME)
> +       return false;
> +      def_stmt = SSA_NAME_DEF_STMT (elt->value);
> +      if (!def_stmt || !is_gimple_assign (def_stmt))
> +       return false;
> +      code = gimple_assign_rhs_code (def_stmt);
> +      if (code != BIT_FIELD_REF)
> +       return false;
> +      op1 = gimple_assign_rhs1 (def_stmt);
> +      ref = TREE_OPERAND (op1, 0);
> +      if (orig)
> +       {
> +         if (ref != orig)
> +           return false;
> +       }
> +      else
> +       {
> +         if (TREE_CODE (ref) != SSA_NAME)
> +           return false;
> +         orig = ref;
> +       }
> +      if (TREE_INT_CST_LOW (TREE_OPERAND (op1, 1)) != elem_size)
> +       return false;
> +      sel[i] = TREE_INT_CST_LOW (TREE_OPERAND (op1, 2)) / elem_size;
> +    }
> +  if (i < nelts)
> +    return false;
> +
> +  op2 = vect_gen_perm_mask (type, sel);
> +  if (!op2)
> +    return false;
> +  orig = unshare_expr (orig);
> +  gimple_assign_set_rhs_code (stmt, VEC_PERM_EXPR);
> +  gimple_set_num_ops (stmt, 4);
> +  gimple_assign_set_rhs1 (stmt, orig);
> +  gimple_assign_set_rhs2 (stmt, orig);
> +  gimple_assign_set_rhs3 (stmt, op2);
> +  fold_stmt (gsi);
> +  update_stmt (gsi_stmt (*gsi));
> +  return true;
> +}
> +
>  /* Main entry point for the forward propagation and statement combine
>     optimizer.  */
>
>  static unsigned int
>  ssa_forward_propagate_and_combine (void)
>  {
>    basic_block bb;
>    unsigned int todoflags = 0;
>
>    cfg_changed = false;
> @@ -2958,20 +3033,23 @@ ssa_forward_propagate_and_combine (void)
>                   }
>                 else if (code == VEC_PERM_EXPR)
>                   {
>                     int did_something = simplify_permutation (&gsi);
>                     if (did_something == 2)
>                       cfg_changed = true;
>                     changed = did_something != 0;
>                   }
>                 else if (code == BIT_FIELD_REF)
>                   changed = simplify_bitfield_ref (&gsi);
> +                else if (code == CONSTRUCTOR
> +                         && TREE_CODE (TREE_TYPE (rhs1)) == VECTOR_TYPE)
> +                  changed = simplify_vector_constructor (&gsi);
>                 break;
>               }
>
>             case GIMPLE_SWITCH:
>               changed = simplify_gimple_switch (stmt);
>               break;
>
>             case GIMPLE_COND:
>               {
>                 int did_something;
>
Marc Glisse Sept. 11, 2012, 2:29 p.m. UTC | #2
On Tue, 11 Sep 2012, Richard Guenther wrote:

> On Tue, Sep 11, 2012 at 1:07 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
>> Hello,
>>
>> here is a patch that turns {v[1],v[0]} into vec_perm_expr(v,v,{1,0}) if the
>> target is ok with it.
>>
>> I am attaching 2 versions of the patch. p-good is the one that passes
>> testing. p-bad, where I rely on fold_stmt to detect identity permutations,
>> ICEs towards the end of the pass while checking a bogus gimple stmt (one
>> that gimple_debug_stmt crashes on if I call it in gdb). From a performance
>> point of view, p-good makes sense, but I liked the simplicity of p-bad and I
>> am confused as to why it fails.
>
> Probably because you cannot simply increase num_ops ...

Ah... thanks, it makes sense now... For some reason I thought it was a 
fixed size structure and num_ops just told it how many of the fields were 
in use.

[...]
> Ok with that change.

Just to be sure, that means you prefer the version where I manually detect 
identity and don't call fold, right?

Thank you for all the quick reviews,
diff mbox

Patch

Index: Makefile.in
===================================================================
--- Makefile.in	(revision 191173)
+++ Makefile.in	(working copy)
@@ -2237,21 +2237,22 @@  tree-outof-ssa.o : tree-outof-ssa.c $(TR
    $(TREE_H) $(DIAGNOSTIC_H) $(TM_H) coretypes.h dumpfile.h \
    $(TREE_SSA_LIVE_H) $(BASIC_BLOCK_H) $(BITMAP_H) $(GGC_H) \
    $(EXPR_H) $(SSAEXPAND_H) $(GIMPLE_PRETTY_PRINT_H)
 tree-ssa-dse.o : tree-ssa-dse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
    $(TREE_FLOW_H) $(TREE_PASS_H) domwalk.h $(FLAGS_H) \
    $(GIMPLE_PRETTY_PRINT_H) langhooks.h
 tree-ssa-forwprop.o : tree-ssa-forwprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) $(CFGLOOP_H) \
    $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \
-   langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H)
+   langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H) \
+   $(TREE_VECTORIZER_H)
 tree-ssa-phiprop.o : tree-ssa-phiprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
    $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \
    langhooks.h $(FLAGS_H) $(GIMPLE_PRETTY_PRINT_H)
 tree-ssa-ifcombine.o : tree-ssa-ifcombine.c $(CONFIG_H) $(SYSTEM_H) \
    coretypes.h $(TM_H) $(TREE_H) $(BASIC_BLOCK_H) \
    $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \
    $(TREE_PRETTY_PRINT_H)
 tree-ssa-phiopt.o : tree-ssa-phiopt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
Index: testsuite/gcc.dg/tree-ssa/forwprop-22.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/forwprop-22.c	(revision 0)
+++ testsuite/gcc.dg/tree-ssa/forwprop-22.c	(revision 0)
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+/* { dg-require-effective-target vect_perm } */
+/* { dg-options "-O -fdump-tree-optimized" } */
+
+typedef double vec __attribute__((vector_size (2 * sizeof (double))));
+void f (vec *px, vec *y, vec *z)
+{
+  vec x = *px;
+  vec t1 = { x[1], x[0] };
+  vec t2 = { x[0], x[1] };
+  *y = t1;
+  *z = t2;
+}
+
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-not "BIT_FIELD_REF" "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */

Property changes on: testsuite/gcc.dg/tree-ssa/forwprop-22.c
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Revision URL
Added: svn:eol-style
   + native

Index: tree-ssa-forwprop.c
===================================================================
--- tree-ssa-forwprop.c	(revision 191173)
+++ tree-ssa-forwprop.c	(working copy)
@@ -26,20 +26,21 @@  along with GCC; see the file COPYING3.
 #include "tm_p.h"
 #include "basic-block.h"
 #include "gimple-pretty-print.h"
 #include "tree-flow.h"
 #include "tree-pass.h"
 #include "langhooks.h"
 #include "flags.h"
 #include "gimple.h"
 #include "expr.h"
 #include "cfgloop.h"
+#include "tree-vectorizer.h"
 
 /* This pass propagates the RHS of assignment statements into use
    sites of the LHS of the assignment.  It's basically a specialized
    form of tree combination.   It is hoped all of this can disappear
    when we have a generalized tree combiner.
 
    One class of common cases we handle is forward propagating a single use
    variable into a COND_EXPR.
 
      bb0:
@@ -2787,20 +2788,105 @@  simplify_permutation (gimple_stmt_iterat
       if (TREE_CODE (op0) == SSA_NAME)
 	ret = remove_prop_source_from_use (op0);
       if (op0 != op1 && TREE_CODE (op1) == SSA_NAME)
 	ret |= remove_prop_source_from_use (op1);
       return ret ? 2 : 1;
     }
 
   return 0;
 }
 
+/* Recognize a VEC_PERM_EXPR.  Returns true if there were any changes.  */
+
+static bool
+simplify_vector_constructor (gimple_stmt_iterator *gsi)
+{
+  gimple stmt = gsi_stmt (*gsi);
+  gimple def_stmt;
+  tree op, op2, orig, type, elem_type;
+  unsigned elem_size, nelts, i;
+  enum tree_code code;
+  constructor_elt *elt;
+  unsigned char *sel;
+  bool maybe_ident;
+
+  gcc_checking_assert (gimple_assign_rhs_code (stmt) == CONSTRUCTOR);
+
+  op = gimple_assign_rhs1 (stmt);
+  type = TREE_TYPE (op);
+  gcc_checking_assert (TREE_CODE (type) == VECTOR_TYPE);
+
+  nelts = TYPE_VECTOR_SUBPARTS (type);
+  elem_type = TREE_TYPE (type);
+  elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
+
+  sel = XALLOCAVEC (unsigned char, nelts);
+  orig = NULL;
+  maybe_ident = true;
+  FOR_EACH_VEC_ELT (constructor_elt, CONSTRUCTOR_ELTS (op), i, elt)
+    {
+      tree ref, op1;
+
+      if (i >= nelts)
+	return false;
+
+      if (TREE_CODE (elt->value) != SSA_NAME)
+	return false;
+      def_stmt = SSA_NAME_DEF_STMT (elt->value);
+      if (!def_stmt || !is_gimple_assign (def_stmt))
+	return false;
+      code = gimple_assign_rhs_code (def_stmt);
+      if (code != BIT_FIELD_REF)
+	return false;
+      op1 = gimple_assign_rhs1 (def_stmt);
+      ref = TREE_OPERAND (op1, 0);
+      if (orig)
+	{
+	  if (ref != orig)
+	    return false;
+	}
+      else
+	{
+	  if (TREE_CODE (ref) != SSA_NAME)
+	    return false;
+	  orig = ref;
+	}
+      if (TREE_INT_CST_LOW (TREE_OPERAND (op1, 1)) != elem_size)
+	return false;
+      sel[i] = TREE_INT_CST_LOW (TREE_OPERAND (op1, 2)) / elem_size;
+      if (sel[i] != i) maybe_ident = false;
+    }
+  if (i < nelts)
+    return false;
+
+  if (maybe_ident)
+    {
+      gimple_assign_set_rhs1 (stmt, unshare_expr (orig));
+      gimple_set_num_ops (stmt, 2);
+      gimple_assign_set_rhs_code (stmt, TREE_CODE (orig));
+      update_stmt (stmt);
+      return true;
+    }
+
+  op2 = vect_gen_perm_mask (type, sel);
+  if (!op2)
+    return false;
+  orig = unshare_expr (orig);
+  gimple_assign_set_rhs_code (stmt, VEC_PERM_EXPR);
+  gimple_set_num_ops (stmt, 4);
+  gimple_assign_set_rhs1 (stmt, orig);
+  gimple_assign_set_rhs2 (stmt, orig);
+  gimple_assign_set_rhs3 (stmt, op2);
+  update_stmt (stmt);
+  return true;
+}
+
 /* Main entry point for the forward propagation and statement combine
    optimizer.  */
 
 static unsigned int
 ssa_forward_propagate_and_combine (void)
 {
   basic_block bb;
   unsigned int todoflags = 0;
 
   cfg_changed = false;
@@ -2958,20 +3044,23 @@  ssa_forward_propagate_and_combine (void)
 		  }
 		else if (code == VEC_PERM_EXPR)
 		  {
 		    int did_something = simplify_permutation (&gsi);
 		    if (did_something == 2)
 		      cfg_changed = true;
 		    changed = did_something != 0;
 		  }
 		else if (code == BIT_FIELD_REF)
 		  changed = simplify_bitfield_ref (&gsi);
+                else if (code == CONSTRUCTOR
+                         && TREE_CODE (TREE_TYPE (rhs1)) == VECTOR_TYPE)
+                  changed = simplify_vector_constructor (&gsi);
 		break;
 	      }
 
 	    case GIMPLE_SWITCH:
 	      changed = simplify_gimple_switch (stmt);
 	      break;
 
 	    case GIMPLE_COND:
 	      {
 		int did_something;