diff mbox

combine vec_perm_expr with constructor

Message ID alpine.DEB.2.02.1208252133130.20769@stedding.saclay.inria.fr
State New
Headers show

Commit Message

Marc Glisse Aug. 25, 2012, 8:54 p.m. UTC
Hello,

this patch (bootstrapped and regtested on x86_64) deals with the same 
issue as the one at:

http://gcc.gnu.org/ml/gcc-patches/2012-08/msg00205.html

that is combining a shuffle of a constructor into a constructor, but at 
the tree-ssa level. An advantage is that it works with any size of vectors 
(the RTL patch only handles size 2 IIRC). A drawback is that it only 
applies to __builtin_shuffle, not the builtins that the x86 front-end 
uses.

Note that fold already knew this optimization (my thanks to whoever wrote 
it, it helped a lot), it just never got a chance to apply it.

In the call to fold_ternary, I am not sure if TREE_TYPE(op0) is the right 
argument, I could also use the type of the lhs, I don't know if that would 
make any difference.


While I am here, I would like to write a patch that converts w={v[1],v[0]} 
to a builtin_shuffle (under the same conditions where a builtin_shuffle is 
not lowered to a constructor of elements, obviously). Is forwprop still 
the right pass to add it, or is there another more relevant one?


2012-08-22  Marc Glisse  <marc.glisse@inria.fr>

gcc/
 	* tree-ssa-forwprop.c (simplify_permutation): Handle CONSTRUCTOR.

gcc/testsuite/
  	* gcc.dg/tree-ssa/forwprop-20.c: New testcase.

Comments

Richard Biener Sept. 3, 2012, 1:40 p.m. UTC | #1
On Sat, Aug 25, 2012 at 10:54 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> Hello,
>
> this patch (bootstrapped and regtested on x86_64) deals with the same issue
> as the one at:
>
> http://gcc.gnu.org/ml/gcc-patches/2012-08/msg00205.html
>
> that is combining a shuffle of a constructor into a constructor, but at the
> tree-ssa level. An advantage is that it works with any size of vectors (the
> RTL patch only handles size 2 IIRC). A drawback is that it only applies to
> __builtin_shuffle, not the builtins that the x86 front-end uses.
>
> Note that fold already knew this optimization (my thanks to whoever wrote
> it, it helped a lot), it just never got a chance to apply it.
>
> In the call to fold_ternary, I am not sure if TREE_TYPE(op0) is the right
> argument, I could also use the type of the lhs, I don't know if that would
> make any difference.
>
>
> While I am here, I would like to write a patch that converts w={v[1],v[0]}
> to a builtin_shuffle (under the same conditions where a builtin_shuffle is
> not lowered to a constructor of elements, obviously). Is forwprop still the
> right pass to add it, or is there another more relevant one?
>
>
> 2012-08-22  Marc Glisse  <marc.glisse@inria.fr>
>
> gcc/
>         * tree-ssa-forwprop.c (simplify_permutation): Handle CONSTRUCTOR.
>
> gcc/testsuite/
>         * gcc.dg/tree-ssa/forwprop-20.c: New testcase.
>
> --
> Marc Glisse
> Index: testsuite/gcc.dg/tree-ssa/forwprop-20.c
> ===================================================================
> --- testsuite/gcc.dg/tree-ssa/forwprop-20.c     (revision 0)
> +++ testsuite/gcc.dg/tree-ssa/forwprop-20.c     (revision 0)
> @@ -0,0 +1,70 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target double64 } */
> +/* { dg-options "-O2 -fdump-tree-optimized" }  */
> +
> +#include <stdint.h>
> +
> +/* All of these optimizations happen for unsupported vector modes as a
> +   consequence of the lowering pass. We need to test with a vector mode
> +   that is supported by default on at least some architectures, or make
> +   the test target specific so we can pass a flag like -mavx.  */
> +
> +typedef double vecf __attribute__ ((vector_size (2 * sizeof (double))));
> +typedef int64_t veci __attribute__ ((vector_size (2 * sizeof (int64_t))));
> +
> +void f (double d, vecf* r)
> +{
> +  vecf x = { -d, 5 };
> +  vecf y = {  1, 4 };
> +  veci m = {  2, 0 };
> +  *r = __builtin_shuffle (x, y, m); // { 1, -d }
> +}
> +
> +void g (float d, vecf* r)
> +{
> +  vecf x = { d, 5 };
> +  vecf y = { 1, 4 };
> +  veci m = { 2, 1 };
> +  *r = __builtin_shuffle (x, y, m); // { 1, 5 }
> +}
> +
> +void h (double d, vecf* r)
> +{
> +  vecf x = { d + 1, 5 };
> +  vecf y = {   1  , 4 };
> +  veci m = {   2  , 0 };
> +  *r = __builtin_shuffle (y, x, m); // { d + 1, 1 }
> +}
> +
> +void i (float d, vecf* r)
> +{
> +  vecf x = { d, 5 };
> +  veci m = { 1, 0 };
> +  *r = __builtin_shuffle (x, m); // { 5, d }
> +}
> +
> +void j (vecf* r)
> +{
> +  vecf y = {  1, 2 };
> +  veci m = {  0, 0 };
> +  *r = __builtin_shuffle (y, m); // { 1, 1 }
> +}
> +
> +void k (vecf* r)
> +{
> +  vecf x = {  3, 4 };
> +  vecf y = {  1, 2 };
> +  veci m = {  3, 0 };
> +  *r = __builtin_shuffle (x, y, m); // { 2, 3 }
> +}
> +
> +void l (double d, vecf* r)
> +{
> +  vecf x = { -d, 5 };
> +  vecf y = {  d, 4 };
> +  veci m = {  2, 0 };
> +  *r = __builtin_shuffle (x, y, m); // { d, -d }
> +}
> +
> +/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "optimized" } } */
> +/* { dg-final { cleanup-tree-dump "optimized" } } */
>
> Property changes on: testsuite/gcc.dg/tree-ssa/forwprop-20.c
> ___________________________________________________________________
> Added: svn:keywords
>    + Author Date Id Revision URL
> Added: svn:eol-style
>    + native
>
> Index: tree-ssa-forwprop.c
> ===================================================================
> --- tree-ssa-forwprop.c (revision 190666)
> +++ tree-ssa-forwprop.c (working copy)
> @@ -2602,75 +2602,130 @@ is_combined_permutation_identity (tree m
>        if (j == i)
>         maybe_identity2 = false;
>        else if (j == i + nelts)
>         maybe_identity1 = false;
>        else
>         return 0;
>      }
>    return maybe_identity1 ? 1 : maybe_identity2 ? 2 : 0;
>  }
>
> -/* Combine two shuffles in a row.  Returns 1 if there were any changes
> -   made, 2 if cfg-cleanup needs to run.  Else it returns 0.  */
> +/* Combine a shuffle with its arguments.  Returns 1 if there were any
> +   changes made, 2 if cfg-cleanup needs to run.  Else it returns 0.  */
>
>  static int
>  simplify_permutation (gimple_stmt_iterator *gsi)
>  {
>    gimple stmt = gsi_stmt (*gsi);
>    gimple def_stmt;
> -  tree op0, op1, op2, op3;
> -  enum tree_code code = gimple_assign_rhs_code (stmt);
> -  enum tree_code code2;
> +  tree op0, op1, op2, op3, arg0, arg1;
> +  enum tree_code code;
>
> -  gcc_checking_assert (code == VEC_PERM_EXPR);
> +  gcc_checking_assert (gimple_assign_rhs_code (stmt) == VEC_PERM_EXPR);
>
>    op0 = gimple_assign_rhs1 (stmt);
>    op1 = gimple_assign_rhs2 (stmt);
>    op2 = gimple_assign_rhs3 (stmt);
>
> -  if (TREE_CODE (op0) != SSA_NAME)
> -    return 0;
> -
>    if (TREE_CODE (op2) != VECTOR_CST)
>      return 0;
>
> -  if (op0 != op1)
> -    return 0;
> +  if (TREE_CODE (op0) == VECTOR_CST)
> +    {
> +      code = VECTOR_CST;
> +      arg0 = op0;

You shouldn't need the VECTOR_CST handling - constant propagation should
already ensure properly simplified code here (and is the more canonical place
to handle this).

> +    }
> +  else if (TREE_CODE (op0) == SSA_NAME)
> +    {
> +      def_stmt = SSA_NAME_DEF_STMT (op0);
> +      if (!def_stmt || !is_gimple_assign (def_stmt)
> +         || !can_propagate_from (def_stmt))
> +       return 0;
>
> -  def_stmt = SSA_NAME_DEF_STMT (op0);
> -  if (!def_stmt || !is_gimple_assign (def_stmt)
> -      || !can_propagate_from (def_stmt))
> +      code = gimple_assign_rhs_code (def_stmt);
> +      arg0 = gimple_assign_rhs1 (def_stmt);
> +    }
> +  else
>      return 0;
>
> -  code2 = gimple_assign_rhs_code (def_stmt);
> -
>    /* Two consecutive shuffles.  */
> -  if (code2 == VEC_PERM_EXPR)
> +  if (code == VEC_PERM_EXPR)
>      {
>        tree orig;
>        int ident;
> +
> +      if (op0 != op1)
> +       return 0;
>        op3 = gimple_assign_rhs3 (def_stmt);
>        if (TREE_CODE (op3) != VECTOR_CST)
>         return 0;
>        ident = is_combined_permutation_identity (op3, op2);
>        if (!ident)
>         return 0;
>        orig = (ident == 1) ? gimple_assign_rhs1 (def_stmt)
>                           : gimple_assign_rhs2 (def_stmt);
>        gimple_assign_set_rhs1 (stmt, unshare_expr (orig));
>        gimple_assign_set_rhs_code (stmt, TREE_CODE (orig));
>        gimple_set_num_ops (stmt, 2);
>        update_stmt (stmt);
>        return remove_prop_source_from_use (op0) ? 2 : 1;
>      }
>
> -  return false;
> +  /* Shuffle of a constructor.  */
> +  else if (code == CONSTRUCTOR || code == VECTOR_CST)
> +    {
> +      tree opt;
> +      bool ret = false;
> +      if (op0 != op1)
> +       {
> +         if (TREE_CODE (op1) == VECTOR_CST)
> +           arg1 = op1;
> +         else if (TREE_CODE (op1) == SSA_NAME)
> +           {
> +             enum tree_code code2;
> +             gimple def_stmt2 = SSA_NAME_DEF_STMT (op1);
> +             if (!def_stmt2 || !is_gimple_assign (def_stmt2)
> +                 || !can_propagate_from (def_stmt2))
> +               return 0;
> +
> +             code2 = gimple_assign_rhs_code (def_stmt2);
> +             if (code2 != CONSTRUCTOR && code2 != VECTOR_CST)
> +               return 0;
> +             arg1 = gimple_assign_rhs1 (def_stmt2);
> +           }
> +         else
> +           return 0;
> +
> +         if ((TREE_CODE (op0) == SSA_NAME && !has_single_use (op0))
> +             || (TREE_CODE (op1) == SSA_NAME && !has_single_use (op1)))
> +           return 0;

You do work above and then bail late here.  Always do early exists early
to reduce useless compile-time.

> +       }
> +      else
> +       {
> +         /* Already used twice in this statement.  */
> +         if (TREE_CODE (op0) == SSA_NAME && num_imm_uses (op0) > 2)
> +           return 0;
> +         arg1 = arg0;
> +       }
> +      opt = fold_ternary (VEC_PERM_EXPR, TREE_TYPE(op0), arg0, arg1, op2);
> +      if (!opt)
> +       return 0;
> +      gimple_assign_set_rhs_from_tree (gsi, opt);

You need to verify that fold_ternary returns something that is valid GIMPLE.
fold () in general happily returns trees that are in the need of
re-gimplification.
You expect a CONSTRUCTOR or VECTOR_CST here, so you should check
for that.

Richard.

> +      update_stmt (gsi_stmt (*gsi));
> +      if (TREE_CODE (op0) == SSA_NAME)
> +       ret = remove_prop_source_from_use (op0);
> +      if (op0 != op1 && TREE_CODE (op1) == SSA_NAME)
> +       ret |= remove_prop_source_from_use (op1);
> +      return ret ? 2 : 1;
> +    }
> +
> +  return 0;
>  }
>
>  /* Main entry point for the forward propagation and statement combine
>     optimizer.  */
>
>  static unsigned int
>  ssa_forward_propagate_and_combine (void)
>  {
>    basic_block bb;
>    unsigned int todoflags = 0;
>
Marc Glisse Sept. 3, 2012, 2 p.m. UTC | #2
On Mon, 3 Sep 2012, Richard Guenther wrote:

> You shouldn't need the VECTOR_CST handling - constant propagation should
> already ensure properly simplified code here (and is the more canonical place
> to handle this).

IIRC, I added VECTOR_CST because of mixed constructor/vector_cst shuffles 
(and because it wasn't too hard). If I remove it (I can), I guess some of 
the testcases won't work anymore.

> You do work above and then bail late here.  Always do early exists early
> to reduce useless compile-time.

Ok.

>> +      opt = fold_ternary (VEC_PERM_EXPR, TREE_TYPE(op0), arg0, arg1, op2);
>> +      if (!opt)
>> +       return 0;
>> +      gimple_assign_set_rhs_from_tree (gsi, opt);
>
> You need to verify that fold_ternary returns something that is valid GIMPLE.
> fold () in general happily returns trees that are in the need of
> re-gimplification.
> You expect a CONSTRUCTOR or VECTOR_CST here, so you should check
> for that.

Ok.

Thank you for the reviews,
Richard Biener Sept. 3, 2012, 3:17 p.m. UTC | #3
On Mon, Sep 3, 2012 at 4:00 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> On Mon, 3 Sep 2012, Richard Guenther wrote:
>
>> You shouldn't need the VECTOR_CST handling - constant propagation should
>> already ensure properly simplified code here (and is the more canonical
>> place
>> to handle this).
>
>
> IIRC, I added VECTOR_CST because of mixed constructor/vector_cst shuffles
> (and because it wasn't too hard). If I remove it (I can), I guess some of
> the testcases won't work anymore.

I see.  If you still have a testcase can you look if CCP does not do something
it should?

>
>> You do work above and then bail late here.  Always do early exists early
>> to reduce useless compile-time.
>
>
> Ok.
>
>
>>> +      opt = fold_ternary (VEC_PERM_EXPR, TREE_TYPE(op0), arg0, arg1,
>>> op2);
>>> +      if (!opt)
>>> +       return 0;
>>> +      gimple_assign_set_rhs_from_tree (gsi, opt);
>>
>>
>> You need to verify that fold_ternary returns something that is valid
>> GIMPLE.
>> fold () in general happily returns trees that are in the need of
>> re-gimplification.
>> You expect a CONSTRUCTOR or VECTOR_CST here, so you should check
>> for that.
>
>
> Ok.
>
> Thank you for the reviews,
>
> --
> Marc Glisse
Marc Glisse Sept. 3, 2012, 3:50 p.m. UTC | #4
On Mon, 3 Sep 2012, Richard Guenther wrote:

> On Mon, Sep 3, 2012 at 4:00 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
>> On Mon, 3 Sep 2012, Richard Guenther wrote:
>>
>>> You shouldn't need the VECTOR_CST handling - constant propagation should
>>> already ensure properly simplified code here (and is the more canonical
>>> place
>>> to handle this).
>>
>>
>> IIRC, I added VECTOR_CST because of mixed constructor/vector_cst shuffles
>> (and because it wasn't too hard). If I remove it (I can), I guess some of
>> the testcases won't work anymore.
>
> I see.  If you still have a testcase can you look if CCP does not do 
> something it should?

I think CCP is working fine, the fold_ternary patch you approved today 
tests some of that (without that patch, sometimes ccp1 does half the work 
and fre1 finishes it, and since forwprop1 is before fre1, I hit that 
case there). Is there a particular scenario you have in mind that might 
not be handled?

Here I was concerned with:
x={a,b}; // constructor
y={18,42}; // vector_cst
m={0,3};
__builtin_shuffle(x,y,m) // should be {a,42}
Richard Biener Sept. 4, 2012, 10:14 a.m. UTC | #5
On Mon, Sep 3, 2012 at 5:50 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> On Mon, 3 Sep 2012, Richard Guenther wrote:
>
>> On Mon, Sep 3, 2012 at 4:00 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
>>>
>>> On Mon, 3 Sep 2012, Richard Guenther wrote:
>>>
>>>> You shouldn't need the VECTOR_CST handling - constant propagation should
>>>> already ensure properly simplified code here (and is the more canonical
>>>> place
>>>> to handle this).
>>>
>>>
>>>
>>> IIRC, I added VECTOR_CST because of mixed constructor/vector_cst shuffles
>>> (and because it wasn't too hard). If I remove it (I can), I guess some of
>>> the testcases won't work anymore.
>>
>>
>> I see.  If you still have a testcase can you look if CCP does not do
>> something it should?
>
>
> I think CCP is working fine, the fold_ternary patch you approved today tests
> some of that (without that patch, sometimes ccp1 does half the work and fre1
> finishes it, and since forwprop1 is before fre1, I hit that case there). Is
> there a particular scenario you have in mind that might not be handled?

No.  In theory it should be handled just fine via gimple_fold_stmt_to_constant_1
dispatching to fold_ternary.

Richard.

> Here I was concerned with:
> x={a,b}; // constructor
> y={18,42}; // vector_cst
> m={0,3};
> __builtin_shuffle(x,y,m) // should be {a,42}
>
> --
> Marc Glisse
diff mbox

Patch

Index: testsuite/gcc.dg/tree-ssa/forwprop-20.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/forwprop-20.c	(revision 0)
+++ testsuite/gcc.dg/tree-ssa/forwprop-20.c	(revision 0)
@@ -0,0 +1,70 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target double64 } */
+/* { dg-options "-O2 -fdump-tree-optimized" }  */
+
+#include <stdint.h>
+
+/* All of these optimizations happen for unsupported vector modes as a
+   consequence of the lowering pass. We need to test with a vector mode
+   that is supported by default on at least some architectures, or make
+   the test target specific so we can pass a flag like -mavx.  */
+
+typedef double vecf __attribute__ ((vector_size (2 * sizeof (double))));
+typedef int64_t veci __attribute__ ((vector_size (2 * sizeof (int64_t))));
+
+void f (double d, vecf* r)
+{
+  vecf x = { -d, 5 };
+  vecf y = {  1, 4 };
+  veci m = {  2, 0 };
+  *r = __builtin_shuffle (x, y, m); // { 1, -d }
+}
+
+void g (float d, vecf* r)
+{
+  vecf x = { d, 5 };
+  vecf y = { 1, 4 };
+  veci m = { 2, 1 };
+  *r = __builtin_shuffle (x, y, m); // { 1, 5 }
+}
+
+void h (double d, vecf* r)
+{
+  vecf x = { d + 1, 5 };
+  vecf y = {   1  , 4 };
+  veci m = {   2  , 0 };
+  *r = __builtin_shuffle (y, x, m); // { d + 1, 1 }
+}
+
+void i (float d, vecf* r)
+{
+  vecf x = { d, 5 };
+  veci m = { 1, 0 };
+  *r = __builtin_shuffle (x, m); // { 5, d }
+}
+
+void j (vecf* r)
+{
+  vecf y = {  1, 2 };
+  veci m = {  0, 0 };
+  *r = __builtin_shuffle (y, m); // { 1, 1 }
+}
+
+void k (vecf* r)
+{
+  vecf x = {  3, 4 };
+  vecf y = {  1, 2 };
+  veci m = {  3, 0 };
+  *r = __builtin_shuffle (x, y, m); // { 2, 3 }
+}
+
+void l (double d, vecf* r)
+{
+  vecf x = { -d, 5 };
+  vecf y = {  d, 4 };
+  veci m = {  2, 0 };
+  *r = __builtin_shuffle (x, y, m); // { d, -d }
+}
+
+/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */

Property changes on: testsuite/gcc.dg/tree-ssa/forwprop-20.c
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Revision URL
Added: svn:eol-style
   + native

Index: tree-ssa-forwprop.c
===================================================================
--- tree-ssa-forwprop.c	(revision 190666)
+++ tree-ssa-forwprop.c	(working copy)
@@ -2602,75 +2602,130 @@  is_combined_permutation_identity (tree m
       if (j == i)
 	maybe_identity2 = false;
       else if (j == i + nelts)
 	maybe_identity1 = false;
       else
 	return 0;
     }
   return maybe_identity1 ? 1 : maybe_identity2 ? 2 : 0;
 }
 
-/* Combine two shuffles in a row.  Returns 1 if there were any changes
-   made, 2 if cfg-cleanup needs to run.  Else it returns 0.  */
+/* Combine a shuffle with its arguments.  Returns 1 if there were any
+   changes made, 2 if cfg-cleanup needs to run.  Else it returns 0.  */
  
 static int
 simplify_permutation (gimple_stmt_iterator *gsi)
 {
   gimple stmt = gsi_stmt (*gsi);
   gimple def_stmt;
-  tree op0, op1, op2, op3;
-  enum tree_code code = gimple_assign_rhs_code (stmt);
-  enum tree_code code2;
+  tree op0, op1, op2, op3, arg0, arg1;
+  enum tree_code code;
 
-  gcc_checking_assert (code == VEC_PERM_EXPR);
+  gcc_checking_assert (gimple_assign_rhs_code (stmt) == VEC_PERM_EXPR);
 
   op0 = gimple_assign_rhs1 (stmt);
   op1 = gimple_assign_rhs2 (stmt);
   op2 = gimple_assign_rhs3 (stmt);
 
-  if (TREE_CODE (op0) != SSA_NAME)
-    return 0;
-
   if (TREE_CODE (op2) != VECTOR_CST)
     return 0;
 
-  if (op0 != op1)
-    return 0;
+  if (TREE_CODE (op0) == VECTOR_CST)
+    {
+      code = VECTOR_CST;
+      arg0 = op0;
+    }
+  else if (TREE_CODE (op0) == SSA_NAME)
+    {
+      def_stmt = SSA_NAME_DEF_STMT (op0);
+      if (!def_stmt || !is_gimple_assign (def_stmt)
+	  || !can_propagate_from (def_stmt))
+	return 0;
 
-  def_stmt = SSA_NAME_DEF_STMT (op0);
-  if (!def_stmt || !is_gimple_assign (def_stmt)
-      || !can_propagate_from (def_stmt))
+      code = gimple_assign_rhs_code (def_stmt);
+      arg0 = gimple_assign_rhs1 (def_stmt);
+    }
+  else
     return 0;
 
-  code2 = gimple_assign_rhs_code (def_stmt);
-
   /* Two consecutive shuffles.  */
-  if (code2 == VEC_PERM_EXPR)
+  if (code == VEC_PERM_EXPR)
     {
       tree orig;
       int ident;
+
+      if (op0 != op1)
+	return 0;
       op3 = gimple_assign_rhs3 (def_stmt);
       if (TREE_CODE (op3) != VECTOR_CST)
 	return 0;
       ident = is_combined_permutation_identity (op3, op2);
       if (!ident)
 	return 0;
       orig = (ident == 1) ? gimple_assign_rhs1 (def_stmt)
 			  : gimple_assign_rhs2 (def_stmt);
       gimple_assign_set_rhs1 (stmt, unshare_expr (orig));
       gimple_assign_set_rhs_code (stmt, TREE_CODE (orig));
       gimple_set_num_ops (stmt, 2);
       update_stmt (stmt);
       return remove_prop_source_from_use (op0) ? 2 : 1;
     }
 
-  return false;
+  /* Shuffle of a constructor.  */
+  else if (code == CONSTRUCTOR || code == VECTOR_CST)
+    {
+      tree opt;
+      bool ret = false;
+      if (op0 != op1)
+	{
+	  if (TREE_CODE (op1) == VECTOR_CST)
+	    arg1 = op1;
+	  else if (TREE_CODE (op1) == SSA_NAME)
+	    {
+	      enum tree_code code2;
+	      gimple def_stmt2 = SSA_NAME_DEF_STMT (op1);
+	      if (!def_stmt2 || !is_gimple_assign (def_stmt2)
+		  || !can_propagate_from (def_stmt2))
+		return 0;
+
+	      code2 = gimple_assign_rhs_code (def_stmt2);
+	      if (code2 != CONSTRUCTOR && code2 != VECTOR_CST)
+		return 0;
+	      arg1 = gimple_assign_rhs1 (def_stmt2);
+	    }
+	  else
+	    return 0;
+
+	  if ((TREE_CODE (op0) == SSA_NAME && !has_single_use (op0))
+	      || (TREE_CODE (op1) == SSA_NAME && !has_single_use (op1)))
+	    return 0;
+	}
+      else
+	{
+	  /* Already used twice in this statement.  */
+	  if (TREE_CODE (op0) == SSA_NAME && num_imm_uses (op0) > 2)
+	    return 0;
+	  arg1 = arg0;
+	}
+      opt = fold_ternary (VEC_PERM_EXPR, TREE_TYPE(op0), arg0, arg1, op2);
+      if (!opt)
+	return 0;
+      gimple_assign_set_rhs_from_tree (gsi, opt);
+      update_stmt (gsi_stmt (*gsi));
+      if (TREE_CODE (op0) == SSA_NAME)
+	ret = remove_prop_source_from_use (op0);
+      if (op0 != op1 && TREE_CODE (op1) == SSA_NAME)
+	ret |= remove_prop_source_from_use (op1);
+      return ret ? 2 : 1;
+    }
+
+  return 0;
 }
 
 /* Main entry point for the forward propagation and statement combine
    optimizer.  */
 
 static unsigned int
 ssa_forward_propagate_and_combine (void)
 {
   basic_block bb;
   unsigned int todoflags = 0;