diff mbox series

Change SLP representation of reduction chains

Message ID nycvar.YFH.7.76.1910241657010.5566@zhemvz.fhfr.qr
State New
Headers show
Series Change SLP representation of reduction chains | expand

Commit Message

Richard Biener Oct. 24, 2019, 3 p.m. UTC
Instead of

t.c:4:3: note:   node 0x3751bf0 (max_nunits=1)
t.c:4:3: note:          stmt 0 sum_24 = _5 + sum_30;
t.c:4:3: note:          stmt 1 sum_25 = _10 + sum_24;
t.c:4:3: note:          stmt 2 sum_26 = _14 + sum_25;
t.c:4:3: note:          stmt 3 sum_27 = _18 + sum_26;
t.c:4:3: note:          children 0x38eb4d0 0x374acb0
t.c:4:3: note:   node 0x38eb4d0 (max_nunits=1)
t.c:4:3: note:          stmt 0 _5 = *_4;
t.c:4:3: note:          stmt 1 _10 = *_9;
t.c:4:3: note:          stmt 2 _14 = *_13;
t.c:4:3: note:          stmt 3 _18 = *_17;
t.c:4:3: note:   node 0x374acb0 (max_nunits=1)
t.c:4:3: note:          stmt 0 sum_30 = PHI <0(5), sum_27(6)>
t.c:4:3: note:          stmt 1 sum_24 = _5 + sum_30;
t.c:4:3: note:          stmt 2 sum_25 = _10 + sum_24;
t.c:4:3: note:          stmt 3 sum_26 = _14 + sum_25;

we want

t.c:4:3: note:   node 0x3d9d110 (max_nunits=1)
t.c:4:3: note:          stmt 0 sum_24 = _5 + sum_30;
t.c:4:3: note:          stmt 1 sum_25 = _10 + sum_24;
t.c:4:3: note:          stmt 2 sum_26 = _14 + sum_25;
t.c:4:3: note:          stmt 3 sum_27 = _18 + sum_26;
t.c:4:3: note:          children 0x3d9d070 0x3d9d0c0
t.c:4:3: note:   node 0x3d9d070 (max_nunits=1)
t.c:4:3: note:          stmt 0 _5 = *_4;
t.c:4:3: note:          stmt 1 _10 = *_9;
t.c:4:3: note:          stmt 2 _14 = *_13;
t.c:4:3: note:          stmt 3 _18 = *_17;
t.c:4:3: note:   node 0x3d9d0c0 (max_nunits=1)
t.c:4:3: note:          stmt 0 sum_30 = PHI <0(5), sum_27(6)>
t.c:4:3: note:          stmt 1 sum_30 = PHI <0(5), sum_27(6)>
t.c:4:3: note:          stmt 2 sum_30 = PHI <0(5), sum_27(6)>
t.c:4:3: note:          stmt 3 sum_30 = PHI <0(5), sum_27(6)>

where we correctly represent the reduction chain as re-associated.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2019-10-24  Richard Biener  <rguenther@suse.de>

	* tree-vect-slp.c (vect_get_and_check_slp_defs): For reduction
	chains try harder with operand swapping and instead of
	putting a shifted chain into the reduction operands put
	a repetition of the final reduction op there as if we'd
	reassociate the expression.

	* gcc.dg/vect/slp-reduc-10a.c: New testcase.
	* gcc.dg/vect/slp-reduc-10b.c: Likewise.
	* gcc.dg/vect/slp-reduc-10c.c: Likewise.
	* gcc.dg/vect/slp-reduc-10d.c: Likewise.
	* gcc.dg/vect/slp-reduc-10e.c: Likewise.
diff mbox series

Patch

Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c	(revision 277372)
+++ gcc/tree-vect-slp.c	(working copy)
@@ -433,20 +433,35 @@  again:
 	     the def-stmt/s of the first stmt.  Allow different definition
 	     types for reduction chains: the first stmt must be a
 	     vect_reduction_def (a phi node), and the rest
-	     vect_internal_def.  */
+	     end in the reduction chain.  */
 	  tree type = TREE_TYPE (oprnd);
 	  if ((oprnd_info->first_dt != dt
 	       && !(oprnd_info->first_dt == vect_reduction_def
-		    && dt == vect_internal_def)
+		    && !STMT_VINFO_DATA_REF (stmt_info)
+		    && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
+		    && def_stmt_info
+		    && !STMT_VINFO_DATA_REF (def_stmt_info)
+		    && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
+			== REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
 	       && !((oprnd_info->first_dt == vect_external_def
 		     || oprnd_info->first_dt == vect_constant_def)
 		    && (dt == vect_external_def
 			|| dt == vect_constant_def)))
-	      || !types_compatible_p (oprnd_info->first_op_type, type))
+	      || !types_compatible_p (oprnd_info->first_op_type, type)
+	      || (!STMT_VINFO_DATA_REF (stmt_info)
+		  && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
+		  && ((!def_stmt_info
+		       || STMT_VINFO_DATA_REF (def_stmt_info)
+		       || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
+			   != REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
+		      != (oprnd_info->first_dt != vect_reduction_def))))
 	    {
 	      /* Try swapping operands if we got a mismatch.  */
 	      if (i == commutative_op && !swapped)
 		{
+		  if (dump_enabled_p ())
+		    dump_printf_loc (MSG_NOTE, vect_location,
+				     "trying swapped operands\n");
 		  swapped = true;
 		  goto again;
 		}
@@ -484,9 +499,26 @@  again:
 	  oprnd_info->ops.quick_push (oprnd);
 	  break;
 
+	case vect_internal_def:
 	case vect_reduction_def:
+	  if (oprnd_info->first_dt == vect_reduction_def
+	      && !STMT_VINFO_DATA_REF (stmt_info)
+	      && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
+	      && !STMT_VINFO_DATA_REF (def_stmt_info)
+	      && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
+		  == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
+	    {
+	      /* For a SLP reduction chain we want to duplicate the
+	         reduction to each of the chain members.  That gets
+		 us a sane SLP graph (still the stmts are not 100%
+		 correct wrt the initial values).  */
+	      gcc_assert (!first);
+	      oprnd_info->def_stmts.quick_push (oprnd_info->def_stmts[0]);
+	      oprnd_info->ops.quick_push (oprnd_info->ops[0]);
+	      break;
+	    }
+	  /* Fallthru.  */
 	case vect_induction_def:
-	case vect_internal_def:
 	  oprnd_info->def_stmts.quick_push (def_stmt_info);
 	  oprnd_info->ops.quick_push (oprnd);
 	  break;
@@ -1182,15 +1214,8 @@  vect_build_slp_tree_2 (vec_info *vinfo,
 	  /* Else def types have to match.  */
 	  stmt_vec_info other_info;
 	  FOR_EACH_VEC_ELT (stmts, i, other_info)
-	    {
-	      /* But for reduction chains only check on the first stmt.  */
-	      if (!STMT_VINFO_DATA_REF (other_info)
-		  && REDUC_GROUP_FIRST_ELEMENT (other_info)
-		  && REDUC_GROUP_FIRST_ELEMENT (other_info) != stmt_info)
-		continue;
-	      if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
-		return NULL;
-	    }
+	    if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
+	      return NULL;
 	}
       else
 	return NULL;
Index: gcc/testsuite/gcc.dg/vect/slp-reduc-10a.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/slp-reduc-10a.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/vect/slp-reduc-10a.c	(working copy)
@@ -0,0 +1,82 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-fgimple" } */
+
+int __GIMPLE (ssa,guessed_local(118111600),startwith("dce3"))
+foo (int * x, int n)
+{
+  int i;
+  int sum;
+  int _1;
+  long unsigned int _2;
+  long unsigned int _3;
+  int * _4;
+  int _5;
+  __SIZETYPE__ _7;
+  __SIZETYPE__ _8;
+  int * _9;
+  int _10;
+  __SIZETYPE__ _11;
+  __SIZETYPE__ _12;
+  int * _13;
+  int _14;
+  __SIZETYPE__ _15;
+  __SIZETYPE__ _16;
+  int * _17;
+  int _18;
+
+  __BB(2,guessed_local(118111600)):
+  if (n_21(D) > 0)
+    goto __BB5(guessed(119453778));
+  else
+    goto __BB7(guessed(14763950));
+
+  __BB(5,guessed_local(105119324)):
+  goto __BB3(precise(134217728));
+
+  __BB(3,loop_header(1),guessed_local(955630224)):
+  sum_30 = __PHI (__BB5: 0, __BB6: sum_27);
+  i_32 = __PHI (__BB5: 0, __BB6: i_28);
+  _1 = i_32 * 4;
+  _2 = (long unsigned int) _1;
+  _3 = _2 * 4ul;
+  _4 = x_23(D) + _3;
+  _5 = __MEM <int> (_4);
+  sum_24 = _5 + sum_30;
+  _7 = _2 + 1ul;
+  _8 = _7 * 4ul;
+  _9 = x_23(D) + _8;
+  _10 = __MEM <int> (_9);
+  sum_25 = _10 + sum_24;
+  _11 = _2 + 2ul;
+  _12 = _11 * 4ul;
+  _13 = x_23(D) + _12;
+  _14 = __MEM <int> (_13);
+  sum_26 = _14 + sum_25;
+  _15 = _2 + 3ul;
+  _16 = _15 * 4ul;
+  _17 = x_23(D) + _16;
+  _18 = __MEM <int> (_17);
+  sum_27 = _18 + sum_26;
+  i_28 = i_32 + 1;
+  if (n_21(D) > i_28)
+    goto __BB6(guessed(119453778));
+  else
+    goto __BB8(guessed(14763950));
+
+  __BB(8,guessed_local(105119324)):
+  goto __BB4(precise(134217728));
+
+  __BB(6,guessed_local(850510900)):
+  goto __BB3(precise(134217728));
+
+  __BB(7,guessed_local(12992276)):
+  goto __BB4(precise(134217728));
+
+  __BB(4,guessed_local(118111601)):
+  sum_31 = __PHI (__BB7: 0, __BB8: sum_27);
+  return sum_31;
+
+}
+
+/* { dg-final { scan-tree-dump "Decided to SLP 1 instances" "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/slp-reduc-10b.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/slp-reduc-10b.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/vect/slp-reduc-10b.c	(working copy)
@@ -0,0 +1,82 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-fgimple" } */
+
+int __GIMPLE (ssa,guessed_local(118111600),startwith("dce3"))
+foo (int * x, int n)
+{
+  int i;
+  int sum;
+  int _1;
+  long unsigned int _2;
+  long unsigned int _3;
+  int * _4;
+  int _5;
+  __SIZETYPE__ _7;
+  __SIZETYPE__ _8;
+  int * _9;
+  int _100;
+  __SIZETYPE__ _11;
+  __SIZETYPE__ _12;
+  int * _13;
+  int _14;
+  __SIZETYPE__ _15;
+  __SIZETYPE__ _16;
+  int * _17;
+  int _18;
+
+  __BB(2,guessed_local(118111600)):
+  if (n_21(D) > 0)
+    goto __BB5(guessed(119453778));
+  else
+    goto __BB7(guessed(14763950));
+
+  __BB(5,guessed_local(105119324)):
+  goto __BB3(precise(134217728));
+
+  __BB(3,loop_header(1),guessed_local(955630224)):
+  sum_30 = __PHI (__BB5: 0, __BB6: sum_27);
+  i_32 = __PHI (__BB5: 0, __BB6: i_28);
+  _1 = i_32 * 4;
+  _2 = (long unsigned int) _1;
+  _3 = _2 * 4ul;
+  _4 = x_23(D) + _3;
+  _5 = __MEM <int> (_4);
+  sum_24 = _5 + sum_30;
+  _7 = _2 + 1ul;
+  _8 = _7 * 4ul;
+  _9 = x_23(D) + _8;
+  _100 = __MEM <int> (_9);
+  sum_25 = sum_24 + _100;
+  _11 = _2 + 2ul;
+  _12 = _11 * 4ul;
+  _13 = x_23(D) + _12;
+  _14 = __MEM <int> (_13);
+  sum_26 = _14 + sum_25;
+  _15 = _2 + 3ul;
+  _16 = _15 * 4ul;
+  _17 = x_23(D) + _16;
+  _18 = __MEM <int> (_17);
+  sum_27 = _18 + sum_26;
+  i_28 = i_32 + 1;
+  if (n_21(D) > i_28)
+    goto __BB6(guessed(119453778));
+  else
+    goto __BB8(guessed(14763950));
+
+  __BB(8,guessed_local(105119324)):
+  goto __BB4(precise(134217728));
+
+  __BB(6,guessed_local(850510900)):
+  goto __BB3(precise(134217728));
+
+  __BB(7,guessed_local(12992276)):
+  goto __BB4(precise(134217728));
+
+  __BB(4,guessed_local(118111601)):
+  sum_31 = __PHI (__BB7: 0, __BB8: sum_27);
+  return sum_31;
+
+}
+
+/* { dg-final { scan-tree-dump "Decided to SLP 1 instances" "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/slp-reduc-10c.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/slp-reduc-10c.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/vect/slp-reduc-10c.c	(working copy)
@@ -0,0 +1,82 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-fgimple" } */
+
+int __GIMPLE (ssa,guessed_local(118111600),startwith("dce3"))
+foo (int * x, int n)
+{
+  int i;
+  int sum;
+  int _1;
+  long unsigned int _2;
+  long unsigned int _3;
+  int * _4;
+  int _5;
+  __SIZETYPE__ _7;
+  __SIZETYPE__ _8;
+  int * _9;
+  int _10;
+  __SIZETYPE__ _11;
+  __SIZETYPE__ _12;
+  int * _13;
+  int _100;
+  __SIZETYPE__ _15;
+  __SIZETYPE__ _16;
+  int * _17;
+  int _18;
+
+  __BB(2,guessed_local(118111600)):
+  if (n_21(D) > 0)
+    goto __BB5(guessed(119453778));
+  else
+    goto __BB7(guessed(14763950));
+
+  __BB(5,guessed_local(105119324)):
+  goto __BB3(precise(134217728));
+
+  __BB(3,loop_header(1),guessed_local(955630224)):
+  sum_30 = __PHI (__BB5: 0, __BB6: sum_27);
+  i_32 = __PHI (__BB5: 0, __BB6: i_28);
+  _1 = i_32 * 4;
+  _2 = (long unsigned int) _1;
+  _3 = _2 * 4ul;
+  _4 = x_23(D) + _3;
+  _5 = __MEM <int> (_4);
+  sum_24 = _5 + sum_30;
+  _7 = _2 + 1ul;
+  _8 = _7 * 4ul;
+  _9 = x_23(D) + _8;
+  _10 = __MEM <int> (_9);
+  sum_25 = _10 + sum_24;
+  _11 = _2 + 2ul;
+  _12 = _11 * 4ul;
+  _13 = x_23(D) + _12;
+  _100 = __MEM <int> (_13);
+  sum_26 = sum_25 + _100;
+  _15 = _2 + 3ul;
+  _16 = _15 * 4ul;
+  _17 = x_23(D) + _16;
+  _18 = __MEM <int> (_17);
+  sum_27 = _18 + sum_26;
+  i_28 = i_32 + 1;
+  if (n_21(D) > i_28)
+    goto __BB6(guessed(119453778));
+  else
+    goto __BB8(guessed(14763950));
+
+  __BB(8,guessed_local(105119324)):
+  goto __BB4(precise(134217728));
+
+  __BB(6,guessed_local(850510900)):
+  goto __BB3(precise(134217728));
+
+  __BB(7,guessed_local(12992276)):
+  goto __BB4(precise(134217728));
+
+  __BB(4,guessed_local(118111601)):
+  sum_31 = __PHI (__BB7: 0, __BB8: sum_27);
+  return sum_31;
+
+}
+
+/* { dg-final { scan-tree-dump "Decided to SLP 1 instances" "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/slp-reduc-10d.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/slp-reduc-10d.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/vect/slp-reduc-10d.c	(working copy)
@@ -0,0 +1,82 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-fgimple" } */
+
+int __GIMPLE (ssa,guessed_local(118111600),startwith("dce3"))
+foo (int * x, int n)
+{
+  int i;
+  int sum;
+  int _1;
+  long unsigned int _2;
+  long unsigned int _3;
+  int * _4;
+  int _5;
+  __SIZETYPE__ _7;
+  __SIZETYPE__ _8;
+  int * _9;
+  int _10;
+  __SIZETYPE__ _11;
+  __SIZETYPE__ _12;
+  int * _13;
+  int _14;
+  __SIZETYPE__ _15;
+  __SIZETYPE__ _16;
+  int * _17;
+  int _100;
+
+  __BB(2,guessed_local(118111600)):
+  if (n_21(D) > 0)
+    goto __BB5(guessed(119453778));
+  else
+    goto __BB7(guessed(14763950));
+
+  __BB(5,guessed_local(105119324)):
+  goto __BB3(precise(134217728));
+
+  __BB(3,loop_header(1),guessed_local(955630224)):
+  sum_30 = __PHI (__BB5: 0, __BB6: sum_27);
+  i_32 = __PHI (__BB5: 0, __BB6: i_28);
+  _1 = i_32 * 4;
+  _2 = (long unsigned int) _1;
+  _3 = _2 * 4ul;
+  _4 = x_23(D) + _3;
+  _5 = __MEM <int> (_4);
+  sum_24 = _5 + sum_30;
+  _7 = _2 + 1ul;
+  _8 = _7 * 4ul;
+  _9 = x_23(D) + _8;
+  _10 = __MEM <int> (_9);
+  sum_25 = _10 + sum_24;
+  _11 = _2 + 2ul;
+  _12 = _11 * 4ul;
+  _13 = x_23(D) + _12;
+  _14 = __MEM <int> (_13);
+  sum_26 = _14 + sum_25;
+  _15 = _2 + 3ul;
+  _16 = _15 * 4ul;
+  _17 = x_23(D) + _16;
+  _100 = __MEM <int> (_17);
+  sum_27 = sum_26 + _100;
+  i_28 = i_32 + 1;
+  if (n_21(D) > i_28)
+    goto __BB6(guessed(119453778));
+  else
+    goto __BB8(guessed(14763950));
+
+  __BB(8,guessed_local(105119324)):
+  goto __BB4(precise(134217728));
+
+  __BB(6,guessed_local(850510900)):
+  goto __BB3(precise(134217728));
+
+  __BB(7,guessed_local(12992276)):
+  goto __BB4(precise(134217728));
+
+  __BB(4,guessed_local(118111601)):
+  sum_31 = __PHI (__BB7: 0, __BB8: sum_27);
+  return sum_31;
+
+}
+
+/* { dg-final { scan-tree-dump "Decided to SLP 1 instances" "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/slp-reduc-10e.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/slp-reduc-10e.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/vect/slp-reduc-10e.c	(working copy)
@@ -0,0 +1,82 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-fgimple" } */
+
+int __GIMPLE (ssa,guessed_local(118111600),startwith("dce3"))
+foo (int * x, int n)
+{
+  int i;
+  int sum;
+  int _1;
+  long unsigned int _2;
+  long unsigned int _3;
+  int * _4;
+  int _100;
+  __SIZETYPE__ _7;
+  __SIZETYPE__ _8;
+  int * _9;
+  int _10;
+  __SIZETYPE__ _11;
+  __SIZETYPE__ _12;
+  int * _13;
+  int _14;
+  __SIZETYPE__ _15;
+  __SIZETYPE__ _16;
+  int * _17;
+  int _18;
+
+  __BB(2,guessed_local(118111600)):
+  if (n_21(D) > 0)
+    goto __BB5(guessed(119453778));
+  else
+    goto __BB7(guessed(14763950));
+
+  __BB(5,guessed_local(105119324)):
+  goto __BB3(precise(134217728));
+
+  __BB(3,loop_header(1),guessed_local(955630224)):
+  sum_30 = __PHI (__BB5: 0, __BB6: sum_27);
+  i_32 = __PHI (__BB5: 0, __BB6: i_28);
+  _1 = i_32 * 4;
+  _2 = (long unsigned int) _1;
+  _3 = _2 * 4ul;
+  _4 = x_23(D) + _3;
+  _100 = __MEM <int> (_4);
+  sum_24 = sum_30 + _100;
+  _7 = _2 + 1ul;
+  _8 = _7 * 4ul;
+  _9 = x_23(D) + _8;
+  _10 = __MEM <int> (_9);
+  sum_25 = _10 + sum_24;
+  _11 = _2 + 2ul;
+  _12 = _11 * 4ul;
+  _13 = x_23(D) + _12;
+  _14 = __MEM <int> (_13);
+  sum_26 = _14 + sum_25;
+  _15 = _2 + 3ul;
+  _16 = _15 * 4ul;
+  _17 = x_23(D) + _16;
+  _18 = __MEM <int> (_17);
+  sum_27 = _18 + sum_26;
+  i_28 = i_32 + 1;
+  if (n_21(D) > i_28)
+    goto __BB6(guessed(119453778));
+  else
+    goto __BB8(guessed(14763950));
+
+  __BB(8,guessed_local(105119324)):
+  goto __BB4(precise(134217728));
+
+  __BB(6,guessed_local(850510900)):
+  goto __BB3(precise(134217728));
+
+  __BB(7,guessed_local(12992276)):
+  goto __BB4(precise(134217728));
+
+  __BB(4,guessed_local(118111601)):
+  sum_31 = __PHI (__BB7: 0, __BB8: sum_27);
+  return sum_31;
+
+}
+
+/* { dg-final { scan-tree-dump "Decided to SLP 1 instances" "vect" } } */