diff mbox series

tree-optimization/98544 - more permute optimization fixes

Message ID nycvar.YFH.7.76.2101081408530.707@elmra.sevgm.obk
State New
Headers show
Series tree-optimization/98544 - more permute optimization fixes | expand

Commit Message

Richard Biener Jan. 8, 2021, 1:09 p.m. UTC
Permute nodes are not transparent to the permute of their children.
Instead we have to materialize child permutes always and in future
may treat permute nodes as the source of arbitrary permutes as
we can permute the lane permutation vector at will (as the target
supports in the end).

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2021-01-08  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/98544
	* tree-vect-slp.c (vect_optimize_slp): Always materialize
	permutes at a permute node.

	* gcc.dg/vect/bb-slp-pr98544.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/bb-slp-pr98544.c | 32 ++++++++++++++++++++
 gcc/tree-vect-slp.c                        | 34 +++++++++++++---------
 2 files changed, 53 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-pr98544.c
diff mbox series

Patch

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr98544.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr98544.c
new file mode 100644
index 00000000000..756dc02ebad
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr98544.c
@@ -0,0 +1,32 @@ 
+/* { dg-do run } */
+
+double a[2], b[2], c[2], d[2];
+
+void __attribute__((noipa))
+foo()
+{
+  double a0 = a[0];
+  double a1 = a[1];
+  double b0 = b[0];
+  double b1 = b[1];
+  double c0 = c[0];
+  double c1 = c[1];
+  double tem1 = a1 - b1;
+  double tem2 = a0 + b0;
+  d[0] = tem1 * c1;
+  d[1] = tem2 * c0;
+}
+
+int main()
+{
+  a[0] = 1.;
+  a[1] = 2.;
+  b[0] = 3.;
+  b[1] = 4.;
+  c[0] = 2.;
+  c[1] = 3.;
+  foo ();
+  if (d[0] != -6. || d[1] != 8.)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index c9da8457e5e..e0f3539aa54 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -3029,19 +3029,27 @@  vect_optimize_slp (vec_info *vinfo)
 
 	  /* Decide on permute materialization.  Look whether there's
 	     a use (pred) edge that is permuted differently than us.
-	     In that case mark ourselves so the permutation is applied.  */
-	  bool all_preds_permuted = slpg->vertices[idx].pred != NULL;
-	  for (graph_edge *pred = slpg->vertices[idx].pred;
-	       pred; pred = pred->pred_next)
-	    {
-	      gcc_checking_assert (bitmap_bit_p (n_visited, pred->src));
-	      int pred_perm = n_perm[pred->src];
-	      if (!vect_slp_perms_eq (perms, perm, pred_perm))
-		{
-		  all_preds_permuted = false;
-		  break;
-		}
-	    }
+	     In that case mark ourselves so the permutation is applied.
+	     For VEC_PERM_EXPRs the permutation doesn't carry along
+	     from children to parents so force materialization at the
+	     point of the VEC_PERM_EXPR.  In principle VEC_PERM_EXPRs
+	     are a source of an arbitrary permutation again, similar
+	     to constants/externals - that's something we do not yet
+	     optimally handle.  */
+	  bool all_preds_permuted = (SLP_TREE_CODE (node) != VEC_PERM_EXPR
+				     && slpg->vertices[idx].pred != NULL);
+	  if (all_preds_permuted)
+	    for (graph_edge *pred = slpg->vertices[idx].pred;
+		 pred; pred = pred->pred_next)
+	      {
+		gcc_checking_assert (bitmap_bit_p (n_visited, pred->src));
+		int pred_perm = n_perm[pred->src];
+		if (!vect_slp_perms_eq (perms, perm, pred_perm))
+		  {
+		    all_preds_permuted = false;
+		    break;
+		  }
+	      }
 	  if (!all_preds_permuted)
 	    {
 	      if (!bitmap_bit_p (n_materialize, idx))