diff mbox series

Apply TLC to vect_analyze_early_break_dependences

Message ID 20240207095026.73ADB3858297@sourceware.org
State New
Headers show
Series Apply TLC to vect_analyze_early_break_dependences | expand

Commit Message

Richard Biener Feb. 7, 2024, 9:49 a.m. UTC
There has been some confusion in my understanding of how early breaks
work, the following clarifies some comments and undoes one change that
shouldn't have been necessary.  It also fixes the dependence test
to avoit TBAA (we're moving stores down across loads).

I'm bootstrapping and testing this on x86_64-unknown-linux-gnu.

It sofar passed vect.exp testing with SSE4.2 and AVX512.

Richard.

	* tree-vect-data-refs.cc (vect_analyze_early_break_dependences):
	Only check whether reads are in-bound in places that are not safe.
	Fix dependence check.  Add missing newline.  Clarify comments.
---
 gcc/tree-vect-data-refs.cc | 43 ++++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 18 deletions(-)
diff mbox series

Patch

diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index f79ade9509b..69ba4fb7a82 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -684,9 +684,10 @@  vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
   /* Since we don't support general control flow, the location we'll move the
      side-effects to is always the latch connected exit.  When we support
      general control flow we can do better but for now this is fine.  Move
-     side-effects to the in-loop destination of the last early exit.  For the PEELED
-     case we move the side-effects to the latch block as this is guaranteed to be the
-     last block to be executed when a vector iteration finished.  */
+     side-effects to the in-loop destination of the last early exit.  For the
+     PEELED case we move the side-effects to the latch block as this is
+     guaranteed to be the last block to be executed when a vector iteration
+     finished.  */
   if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
     dest_bb = loop->latch;
   else
@@ -697,9 +698,9 @@  vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
      loads.  */
   basic_block bb = dest_bb;
 
-  /* In the peeled case we need to check all the loads in the loop since to move the
-     the stores we lift the stores over all loads into the latch.  */
-  bool check_deps = LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo);
+  /* We move stores across all loads to the beginning of dest_bb, so
+     the first block processed below doesn't need dependence checking.  */
+  bool check_deps = false;
 
   do
     {
@@ -711,8 +712,7 @@  vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
 	{
 	  gimple *stmt = gsi_stmt (gsi);
 	  gsi_prev (&gsi);
-	  if (!gimple_has_ops (stmt)
-	      || is_gimple_debug (stmt))
+	  if (is_gimple_debug (stmt))
 	    continue;
 
 	  stmt_vec_info stmt_vinfo = loop_vinfo->lookup_stmt (stmt);
@@ -720,18 +720,25 @@  vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
 	  if (!dr_ref)
 	    continue;
 
+	  /* We know everything below dest_bb is safe since we know we
+	     had a full vector iteration when reaching it.  Either by
+	     the loop entry / IV exit test being last or because this
+	     is the loop latch itself.  */
+	  if (!check_deps)
+	    continue;
+
 	  /* Check if vector accesses to the object will be within bounds.
 	     must be a constant or assume loop will be versioned or niters
-	     bounded by VF so accesses are within range.  We only need to check the
-	     reads since writes are moved to a safe place where if we get there we
-	     know they are safe to perform.  */
+	     bounded by VF so accesses are within range.  We only need to check
+	     the reads since writes are moved to a safe place where if we get
+	     there we know they are safe to perform.  */
 	  if (DR_IS_READ (dr_ref)
 	      && !ref_within_array_bound (stmt, DR_REF (dr_ref)))
 	    {
 	      if (dump_enabled_p ())
 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 				 "early breaks not supported: vectorization "
-				 "would %s beyond size of obj.",
+				 "would %s beyond size of obj.\n",
 				 DR_IS_READ (dr_ref) ? "read" : "write");
 	      return opt_result::failure_at (stmt,
 				 "can't safely apply code motion to "
@@ -739,9 +746,6 @@  vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
 				 "the early exit.\n", stmt);
 	    }
 
-	  if (!check_deps)
-	    continue;
-
 	  if (DR_IS_READ (dr_ref))
 	    bases.safe_push (dr_ref);
 	  else if (DR_IS_WRITE (dr_ref))
@@ -768,7 +772,11 @@  vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
 		 the store.  */
 
 	      for (auto dr_read : bases)
-		if (dr_may_alias_p (dr_ref, dr_read, loop_nest))
+		/* Note we're not passing the DRs in stmt order here
+		   since the DR dependence checking routine does not
+		   envision we're moving stores down.  The read-write
+		   order tricks it to avoid applying TBAA.  */
+		if (dr_may_alias_p (dr_read, dr_ref, loop_nest))
 		  {
 		    if (dump_enabled_p ())
 		      dump_printf_loc (MSG_MISSED_OPTIMIZATION,
@@ -808,8 +816,7 @@  vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
 	  break;
 	}
 
-      /* For the non-PEELED case we don't want to check the loads in the IV exit block
-	 for dependencies with the stores, but any block preceeding it we do.  */
+      /* All earlier blocks need dependence checking.  */
       check_deps = true;
       bb = single_pred (bb);
     }