diff mbox series

Fix PR92166

Message ID nycvar.YFH.7.76.1910221504520.5566@zhemvz.fhfr.qr
State New
Headers show
Series Fix PR92166 | expand

Commit Message

Richard Biener Oct. 22, 2019, 1:06 p.m. UTC
The following  patch fixes an ICE when vectorizing shifts with the
simplified SLP operand code by adjusting the type of the shift
argument in vectorizable_shift.  I took the liberty to enable
more SLP shift vectorization for originally not "scalar" (same)
but constant shift amounts as we do as fallback anyways.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2019-10-22  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/92166
	* tree-vect-slp.c (vect_get_and_check_slp_defs): Demote a
	vect_constant_def operand to vect_external_defs if there is
	at least one vect_external_defs.
	(vect_print_slp_tree): Dump scalar ops.
	* tree-vect-stmts.c (vectorizable_shift): For SLP shifts
	with constant shift amount convert the scalars to the desired
	vector component type.

	* gcc.dg/vshift-5.c: Amend.
diff mbox series

Patch

Index: gcc/testsuite/gcc.dg/vshift-5.c
===================================================================
--- gcc/testsuite/gcc.dg/vshift-5.c	(revision 277280)
+++ gcc/testsuite/gcc.dg/vshift-5.c	(working copy)
@@ -41,6 +41,42 @@  f2 (void)
 }
 
 __attribute__((noinline, noclone)) void
+f2a (int x)
+{
+  long long a0, a1, a2, a3;
+  a0 = a[0];
+  a1 = a[1];
+  a2 = a[2];
+  a3 = a[3];
+  a0 = a0 << x;
+  a1 = a1 << 2;
+  a2 = a2 << 2;
+  a3 = a3 << 2;
+  a[0] = a0;
+  a[1] = a1;
+  a[2] = a2;
+  a[3] = a3;
+}
+
+__attribute__((noinline, noclone)) void
+f2b (int x)
+{
+  long long a0, a1, a2, a3;
+  a0 = a[0];
+  a1 = a[1];
+  a2 = a[2];
+  a3 = a[3];
+  a0 = a0 << 2;
+  a1 = a1 << 2;
+  a2 = a2 << x;
+  a3 = a3 << 2;
+  a[0] = a0;
+  a[1] = a1;
+  a[2] = a2;
+  a[3] = a3;
+}
+
+__attribute__((noinline, noclone)) void
 f3 (int x)
 {
   long long a0, a1, a2, a3;
@@ -77,5 +113,13 @@  main ()
   if (a[0] != (4LL << 7) || a[1] != (3LL << 8)
       || a[2] != (2LL << 9) || a[3] != (1LL << 10))
     abort ();
+  f2a (3);
+  if (a[0] != (4LL << 10) || a[1] != (3LL << 10)
+      || a[2] != (2LL << 11) || a[3] != (1LL << 12))
+    abort ();
+  f2b (3);
+  if (a[0] != (4LL << 12) || a[1] != (3LL << 12)
+      || a[2] != (2LL << 14) || a[3] != (1LL << 14))
+    abort ();
   return 0;
 }
Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c	(revision 277280)
+++ gcc/tree-vect-slp.c	(working copy)
@@ -475,8 +475,11 @@  again:
       /* Check the types of the definitions.  */
       switch (dt)
 	{
-	case vect_constant_def:
 	case vect_external_def:
+	  /* Make sure to demote the overall operand to external.  */
+	  oprnd_info->first_dt = vect_external_def;
+	  /* Fallthru.  */
+	case vect_constant_def:
 	  oprnd_info->def_stmts.quick_push (NULL);
 	  oprnd_info->ops.quick_push (oprnd);
 	  break;
@@ -1504,9 +1507,10 @@  static void
 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
 		     slp_tree node, hash_set<slp_tree> &visited)
 {
-  int i;
+  unsigned i;
   stmt_vec_info stmt_info;
   slp_tree child;
+  tree op;
 
   if (visited.add (node))
     return;
@@ -1514,11 +1518,23 @@  vect_print_slp_tree (dump_flags_t dump_k
   dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
   dump_user_location_t user_loc = loc.get_user_location ();
   dump_printf_loc (metadata, user_loc, "node%s %p (max_nunits=%u)\n",
-		   SLP_TREE_DEF_TYPE (node) != vect_internal_def
-		   ? " (external)" : "", node,
+		   SLP_TREE_DEF_TYPE (node) == vect_external_def
+		   ? " (external)"
+		   : (SLP_TREE_DEF_TYPE (node) == vect_constant_def
+		      ? " (constant)"
+		      : ""), node,
 		   estimated_poly_value (node->max_nunits));
-  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
-    dump_printf_loc (metadata, user_loc, "\tstmt %d %G", i, stmt_info->stmt);
+  if (SLP_TREE_SCALAR_STMTS (node).exists ())
+    FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
+      dump_printf_loc (metadata, user_loc, "\tstmt %u %G", i, stmt_info->stmt);
+  else
+    {
+      dump_printf_loc (metadata, user_loc, "\t{ ");
+      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
+	dump_printf (metadata, "%T%s ", op,
+		     i < SLP_TREE_SCALAR_OPS (node).length () - 1 ? "," : "");
+      dump_printf (metadata, "}\n");
+    }
   if (SLP_TREE_CHILDREN (node).is_empty ())
     return;
   dump_printf_loc (metadata, user_loc, "\tchildren");
Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c	(revision 277280)
+++ gcc/tree-vect-stmts.c	(working copy)
@@ -5670,8 +5670,11 @@  vectorizable_shift (stmt_vec_info stmt_i
 
       if (!op1_vectype)
 	op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
-      if (op1_vectype == NULL_TREE
-	  || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
+      if ((op1_vectype == NULL_TREE
+	   || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
+	  && (!slp_node
+	      || SLP_TREE_DEF_TYPE
+		   (SLP_TREE_CHILDREN (slp_node)[1]) != vect_constant_def))
 	{
 	  if (dump_enabled_p ())
 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -5710,7 +5713,10 @@  vectorizable_shift (stmt_vec_info stmt_i
                  so make sure the scalar is the right type if we are
 		 dealing with vectors of long long/long/short/char.  */
               if (dt[1] == vect_constant_def)
-                op1 = fold_convert (TREE_TYPE (vectype), op1);
+		{
+		  if (!slp_node)
+		    op1 = fold_convert (TREE_TYPE (vectype), op1);
+		}
 	      else if (!useless_type_conversion_p (TREE_TYPE (vectype),
 						   TREE_TYPE (op1)))
 		{
@@ -5821,6 +5827,23 @@  vectorizable_shift (stmt_vec_info stmt_i
                     }
                 }
             }
+	  else if (slp_node
+		   && !useless_type_conversion_p (TREE_TYPE (vectype),
+						  TREE_TYPE (op1)))
+	    {
+	      /* Convert the scalar constant shift amounts in-place.  */
+	      slp_tree shift = SLP_TREE_CHILDREN (slp_node)[1];
+	      gcc_assert (SLP_TREE_DEF_TYPE (shift) == vect_constant_def);
+	      for (unsigned i = 0;
+		   i < SLP_TREE_SCALAR_OPS (shift).length (); ++i)
+		{
+		  SLP_TREE_SCALAR_OPS (shift)[i]
+		    = fold_convert (TREE_TYPE (vectype),
+				    SLP_TREE_SCALAR_OPS (shift)[i]);
+		  gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (shift)[i])
+			       == INTEGER_CST));
+		}
+	    }
 
           /* vec_oprnd1 is available if operand 1 should be of a scalar-type
              (a special case for certain kind of vector shifts); otherwise,