Patchwork combine permutations in gimple

login
register
mail settings
Submitter Marc Glisse
Date Aug. 10, 2012, 9:40 p.m.
Message ID <alpine.DEB.2.02.1208102312390.13110@stedding.saclay.inria.fr>
Download mbox | patch
Permalink /patch/176629/
State New
Headers show

Comments

Marc Glisse - Aug. 10, 2012, 9:40 p.m.
Hello,

this patch detects permutations of permutations and merges them. It also 
canonicalizes permutations a bit more.

There are several issues with this patch:

1) I am not sure we always want to combine permutations. Indeed, someone 
(user? vectorizer?) may have written 2 permutations to help the backend 
generate optimal code, whereas it will do a bad job on the complicated 
combined permutation. At tree level, I am not sure what can be done except 
possibly restrict the optimization to the case where the combined 
permutation is the identity. At the RTL level, we could compare what insns 
are generated, but that's going to be painful (doing anything with 
permutations at the RTL level is painful).

2) I don't understand how the cleanup works in tree-ssa-forwprop.c. I 
copied some fold/update/remove lines from other simplifications, but I 
don't know if they are right.

3) In a first version of the patch, where I had SSA_NAME_DEF_STMT instead 
of get_prop_source_stmt, I got an ICE in one of the torture vectorization 
testcases. It happened in expand_expr_real_2, because it asserts that 
expand_vec_perm will never fail. However, expand_vec_perm does return 
NULL_RTX sometimes. Here it was in V16QImode, with the permutation 
{0,0,2,2,4,...,14,14}. maybe_expand_insn can't handle it directly (that's 
ok I guess), but then expand_vec_perm sees VOIDmode and exits instead of 
trying other ways. I don't know if there is a latent bug or if (more 
likely) my patch may produce trees with wrong modes.

4) Is this the right place?

This isn't the transformation I am most interested in, but it is a first 
step to see if the direction is right.


Happily passed bootstrap+regtest.

2012-08-10  Marc Glisse  <marc.glisse@inria.fr>

gcc/
 	* fold-const.c (fold_ternary_loc): Detect identity permutations.
 	Canonicalize permutations more.
 	* tree-ssa-forwprop.c (simplify_permutation): New function.
 	(ssa_forward_propagate_and_combine): Call it.

gcc/testsuite/
 	* gcc.dg/tree-ssa/forwprop-19.c: New testcase.
Marc Glisse - Aug. 10, 2012, 9:47 p.m.
On Fri, 10 Aug 2012, Marc Glisse wrote:

> There are several issues with this patch:

Oups:

5) and the testcase needs fixing, just like Jakub just fixed Richard's 
testcase, to avoid ABI warnings. But that's easy.

Patch

Index: testsuite/gcc.dg/tree-ssa/forwprop-19.c

===================================================================
--- testsuite/gcc.dg/tree-ssa/forwprop-19.c	(revision 0)

+++ testsuite/gcc.dg/tree-ssa/forwprop-19.c	(revision 0)

@@ -0,0 +1,16 @@ 

+/* { dg-do compile } */

+/* { dg-options "-O -fdump-tree-forwprop1" } */

+

+typedef int vec __attribute__((vector_size (2 * sizeof (int))));

+vec f (vec x1, vec x2)

+{

+  vec m = { 2, 1 };

+  vec n = { 1, 8 };

+  vec y = __builtin_shuffle (x1, x2, n);

+  vec z = __builtin_shuffle (y, m);

+  return z;

+}

+

+/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*x1.*x1.*{ 1, 0 }" "forwprop1" } } */

+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "forwprop1" } } */

+/* { dg-final { cleanup-tree-dump "forwprop1" } } */


Property changes on: testsuite/gcc.dg/tree-ssa/forwprop-19.c
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Revision URL
Added: svn:eol-style
   + native

Index: fold-const.c

===================================================================
--- fold-const.c	(revision 190301)

+++ fold-const.c	(working copy)

@@ -14148,58 +14148,96 @@  fold_ternary_loc (location_t loc, enum t

 	return fold_build2_loc (loc, PLUS_EXPR, type,
 				const_binop (MULT_EXPR, arg0, arg1), arg2);
       if (integer_zerop (arg2))
 	return fold_build2_loc (loc, MULT_EXPR, type, arg0, arg1);
 
       return fold_fma (loc, type, arg0, arg1, arg2);
 
     case VEC_PERM_EXPR:
       if (TREE_CODE (arg2) == VECTOR_CST)
 	{
-	  unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i;

+	  unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i, mask;

 	  unsigned char *sel = XALLOCAVEC (unsigned char, nelts);
 	  tree t;
 	  bool need_mask_canon = false;
+	  bool all_in_vec0 = true;

+	  bool all_in_vec1 = true;

+	  bool maybe_identity = true;

+	  bool single_arg = (op0 == op1);

+	  bool changed = false;

 
+	  mask = single_arg ? (nelts - 1) : (2 * nelts - 1);

 	  gcc_assert (nelts == VECTOR_CST_NELTS (arg2));
 	  for (i = 0; i < nelts; i++)
 	    {
 	      tree val = VECTOR_CST_ELT (arg2, i);
 	      if (TREE_CODE (val) != INTEGER_CST)
 		return NULL_TREE;
 
-	      sel[i] = TREE_INT_CST_LOW (val) & (2 * nelts - 1);

+	      sel[i] = TREE_INT_CST_LOW (val) & mask;

 	      if (TREE_INT_CST_HIGH (val)
 		  || ((unsigned HOST_WIDE_INT)
 		      TREE_INT_CST_LOW (val) != sel[i]))
 		need_mask_canon = true;
+

+	      if (sel[i] < nelts)

+		all_in_vec1 = false;

+	      else

+		all_in_vec0 = false;

+

+	      if ((sel[i] & (nelts-1)) != i)

+		maybe_identity = false;

+	    }

+

+	  if (maybe_identity)

+	    {

+	      if (all_in_vec0)

+		return op0;

+	      if (all_in_vec1)

+		return op1;

 	    }
 
 	  if ((TREE_CODE (arg0) == VECTOR_CST
 	       || TREE_CODE (arg0) == CONSTRUCTOR)
 	      && (TREE_CODE (arg1) == VECTOR_CST
 		  || TREE_CODE (arg1) == CONSTRUCTOR))
 	    {
 	      t = fold_vec_perm (type, arg0, arg1, sel);
 	      if (t != NULL_TREE)
 		return t;
 	    }
 
+	  if (all_in_vec0)

+	    op1 = op0;

+	  else if (all_in_vec1)

+	    {

+	      op0 = op1;

+	      for (i = 0; i < nelts; i++)

+		sel[i] -= nelts;

+	      need_mask_canon = true;

+	    }

+

+	  if (op0 == op1 && !single_arg)

+	    changed = true;

+

 	  if (need_mask_canon && arg2 == op2)
 	    {
 	      tree *tsel = XALLOCAVEC (tree, nelts);
 	      tree eltype = TREE_TYPE (TREE_TYPE (arg2));
 	      for (i = 0; i < nelts; i++)
 		tsel[i] = build_int_cst (eltype, sel[i]);
-	      t = build_vector (TREE_TYPE (arg2), tsel);

-	      return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, t);

+	      op2 = build_vector (TREE_TYPE (arg2), tsel);

+	      changed = true;

 	    }
+

+	  if (changed)

+	    return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, op2);

 	}
       return NULL_TREE;
 
     default:
       return NULL_TREE;
     } /* switch (code) */
 }
 
 /* Perform constant folding and related simplification of EXPR.
    The related simplifications include x*1 => x, x*0 => 0, etc.,
Index: tree-ssa-forwprop.c

===================================================================
--- tree-ssa-forwprop.c	(revision 190301)

+++ tree-ssa-forwprop.c	(working copy)

@@ -2569,20 +2569,74 @@  combine_conversions (gimple_stmt_iterato

 	      gimple_assign_set_rhs_code (stmt, CONVERT_EXPR);
 	      update_stmt (stmt);
 	      return remove_prop_source_from_use (op0) ? 2 : 1;
 	    }
 	}
     }
 
   return 0;
 }
 
+/* Combine two shuffles in a row.  Returns 1 if there were any changes

+   made, 2 if cfg-cleanup needs to run.  Else it returns 0.  */

+ 

+static int

+simplify_permutation (gimple_stmt_iterator *gsi)

+{

+  gimple stmt = gsi_stmt (*gsi);

+  gimple def_stmt;

+  tree op0, op1, op2, op3, mask;

+  enum tree_code code = gimple_assign_rhs_code (stmt);

+  enum tree_code code2;

+  location_t loc = gimple_location (stmt);

+

+  gcc_checking_assert (code == VEC_PERM_EXPR);

+

+  op0 = gimple_assign_rhs1 (stmt);

+  op1 = gimple_assign_rhs2 (stmt);

+  op2 = gimple_assign_rhs3 (stmt);

+

+  if (TREE_CODE (op0) != SSA_NAME)

+    return 0;

+

+  if (TREE_CODE (op2) != VECTOR_CST)

+    return 0;

+

+  if (op0 != op1)

+    return 0;

+

+  def_stmt = get_prop_source_stmt (op0, true, NULL);

+  if (!def_stmt || !can_propagate_from (def_stmt))

+    return 0;

+

+  code2 = gimple_assign_rhs_code (def_stmt);

+

+  /* Two consecutive shuffles.  */

+  if (code2 == VEC_PERM_EXPR)

+    {

+      op3 = gimple_assign_rhs3 (def_stmt);

+      if (TREE_CODE (op3) != VECTOR_CST)

+	return 0;

+      mask = fold_build3_loc (loc, VEC_PERM_EXPR, TREE_TYPE (op3),

+			      op3, op3, op2);

+      gcc_assert (TREE_CODE (mask) == VECTOR_CST);

+      gimple_assign_set_rhs1 (stmt, gimple_assign_rhs1 (def_stmt));

+      gimple_assign_set_rhs2 (stmt, gimple_assign_rhs2 (def_stmt));

+      gimple_assign_set_rhs3 (stmt, mask);

+      fold_stmt_inplace (gsi);

+      update_stmt (stmt);

+      return remove_prop_source_from_use (op0) ? 2 : 1;

+    }

+

+  return false;

+}

+

 /* Main entry point for the forward propagation and statement combine
    optimizer.  */
 
 static unsigned int
 ssa_forward_propagate_and_combine (void)
 {
   basic_block bb;
   unsigned int todoflags = 0;
 
   cfg_changed = false;
@@ -2731,20 +2785,27 @@  ssa_forward_propagate_and_combine (void)

 		  changed = associate_pointerplus (&gsi);
 		else if (CONVERT_EXPR_CODE_P (code)
 			 || code == FLOAT_EXPR
 			 || code == FIX_TRUNC_EXPR)
 		  {
 		    int did_something = combine_conversions (&gsi);
 		    if (did_something == 2)
 		      cfg_changed = true;
 		    changed = did_something != 0;
 		  }
+		else if (code == VEC_PERM_EXPR)

+		  {

+		    int did_something = simplify_permutation (&gsi);

+		    if (did_something == 2)

+		      cfg_changed = true;

+		    changed = did_something != 0;

+		  }

 		break;
 	      }
 
 	    case GIMPLE_SWITCH:
 	      changed = simplify_gimple_switch (stmt);
 	      break;
 
 	    case GIMPLE_COND:
 	      {
 		int did_something;