diff mbox series

tree-optimization/114249 - ICE with BB reduction vectorization

Message ID 20240306092719.7B6B83858010@sourceware.org
State New
Headers show
Series tree-optimization/114249 - ICE with BB reduction vectorization | expand

Commit Message

Richard Biener March 6, 2024, 9:26 a.m. UTC
When we scrap the last def of an odd lane numbered BB reduction
we can end up recording a pattern def which will later wreck
code generation.  The following puts this logic where it better
belongs, avoiding this issue.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

	PR tree-optimization/114249
	* tree-vect-slp.cc (vect_build_slp_instance): Move making
	a BB reduction lane number even ...
	(vect_slp_check_for_roots): ... here to avoid leaking
	pattern defs.

	* gcc.dg/vect/bb-slp-pr114249.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/bb-slp-pr114249.c | 20 ++++++++++++++++++++
 gcc/tree-vect-slp.cc                        | 20 ++++++++++----------
 2 files changed, 30 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-pr114249.c
diff mbox series

Patch

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr114249.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr114249.c
new file mode 100644
index 00000000000..64c93cd9a2d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr114249.c
@@ -0,0 +1,20 @@ 
+/* { dg-do compile } */
+
+enum { SEG_THIN_POOL } read_only;
+struct {
+  unsigned skip_block_zeroing;
+  unsigned ignore_discard;
+  unsigned no_discard_passdown;
+  unsigned error_if_no_space;
+} _thin_pool_emit_segment_line_seg;
+void dm_snprintf();
+void _emit_segment()
+{
+  int features =
+      (_thin_pool_emit_segment_line_seg.error_if_no_space ? 1 : 0) +
+      (read_only ? 1 : 0) +
+      (_thin_pool_emit_segment_line_seg.ignore_discard ? 1 : 0) +
+      (_thin_pool_emit_segment_line_seg.no_discard_passdown ? 1 : 0) +
+      (_thin_pool_emit_segment_line_seg.skip_block_zeroing ? 1 : 0);
+  dm_snprintf(features);
+}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 324400db19e..527b06c9f9c 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -3288,15 +3288,6 @@  vect_build_slp_instance (vec_info *vinfo,
 			 "  %G", scalar_stmts[i]->stmt);
     }
 
-  /* When a BB reduction doesn't have an even number of lanes
-     strip it down, treating the remaining lane as scalar.
-     ???  Selecting the optimal set of lanes to vectorize would be nice
-     but SLP build for all lanes will fail quickly because we think
-     we're going to need unrolling.  */
-  if (kind == slp_inst_kind_bb_reduc
-      && (scalar_stmts.length () & 1))
-    remain.safe_insert (0, gimple_get_lhs (scalar_stmts.pop ()->stmt));
-
   /* Build the tree for the SLP instance.  */
   unsigned int group_size = scalar_stmts.length ();
   bool *matches = XALLOCAVEC (bool, group_size);
@@ -7549,6 +7540,7 @@  vect_slp_check_for_roots (bb_vec_info bb_vinfo)
 	      /* ???  For now do not allow mixing ops or externs/constants.  */
 	      bool invalid = false;
 	      unsigned remain_cnt = 0;
+	      unsigned last_idx = 0;
 	      for (unsigned i = 0; i < chain.length (); ++i)
 		{
 		  if (chain[i].code != code)
@@ -7563,7 +7555,13 @@  vect_slp_check_for_roots (bb_vec_info bb_vinfo)
 						      (chain[i].op)->stmt)
 			  != chain[i].op))
 		    remain_cnt++;
+		  else
+		    last_idx = i;
 		}
+	      /* Make sure to have an even number of lanes as we later do
+		 all-or-nothing discovery, not trying to split further.  */
+	      if ((chain.length () - remain_cnt) & 1)
+		remain_cnt++;
 	      if (!invalid && chain.length () - remain_cnt > 1)
 		{
 		  vec<stmt_vec_info> stmts;
@@ -7576,7 +7574,9 @@  vect_slp_check_for_roots (bb_vec_info bb_vinfo)
 		      stmt_vec_info stmt_info;
 		      if (chain[i].dt == vect_internal_def
 			  && ((stmt_info = bb_vinfo->lookup_def (chain[i].op)),
-			      gimple_get_lhs (stmt_info->stmt) == chain[i].op))
+			      gimple_get_lhs (stmt_info->stmt) == chain[i].op)
+			  && (i != last_idx
+			      || (stmts.length () & 1)))
 			stmts.quick_push (stmt_info);
 		      else
 			remain.quick_push (chain[i].op);