diff mbox series

middle-end: delay updating of dominators until later during vectorization. [PR114081]

Message ID patch-18326-tamar@arm.com
State New
Headers show
Series middle-end: delay updating of dominators until later during vectorization. [PR114081] | expand

Commit Message

Tamar Christina Feb. 25, 2024, 3:36 p.m. UTC
Hi All,

The testcase shows an interesting case where we have multiple loops sharing a
live value and have an early exit that go to the same location.  The additional
complication is that on x86_64 with -mavx we seem to also do prologue peeling
on the loops.

We correctly identify which BB we need their dominators updated for, but we do
so too early.

Instead of adding more dominator update we can solve this by for the cases with
multiple exits not to verify dominators at the end of peeling if peeling for
vectorization.

We can then perform the final dominator updates just before vectorization when
all loop transformations are done.

This also means we reduce the number of dominator updates needed by at least
50% and fixes the ICE.

Bootstrapped Regtested on aarch64-none-linux-gnu and
x86_64-pc-linux-gnu no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	PR tree-optimization/114081
	PR tree-optimization/113290
	* tree-vect-loop-manip.cc (slpeel_tree_duplicate_loop_to_edge_cfg):
	Skip dominator update when multiple exit.
	(vect_do_peeling): Remove multiple exit dominator update.
	* tree-vect-loop.cc (vect_transform_loop): Update dominators when
	multiple exits.
	* tree-vectorizer.h (LOOP_VINFO_DOMS_NEED_UPDATE,
			     dominators_needing_update): New.

gcc/testsuite/ChangeLog:

	PR tree-optimization/114081
	PR tree-optimization/113290
	* gcc.dg/vect/vect-early-break_120-pr114081.c: New test.
	* gcc.dg/vect/vect-early-break_121-pr114081.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c
new file mode 100644
index 0000000000000000000000000000000000000000..2cd4ce1e4ac573ba6e41730fd2216f0ec8061376




--
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c
new file mode 100644
index 0000000000000000000000000000000000000000..2cd4ce1e4ac573ba6e41730fd2216f0ec8061376
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-O3" } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+typedef struct filter_list_entry {
+  const char *name;
+  int id;
+  void (*function)();
+} filter_list_entry;
+
+static const filter_list_entry filter_list[9] = {0};
+
+void php_zval_filter(int filter, int id1) {
+  filter_list_entry filter_func;
+
+  int size = 9;
+  for (int i = 0; i < size; ++i) {
+    if (filter_list[i].id == filter) {
+      filter_func = filter_list[i];
+      goto done;
+    }
+  }
+
+#pragma GCC novector
+  for (int i = 0; i < size; ++i) {
+    if (filter_list[i].id == 0x0204) {
+      filter_func = filter_list[i];
+      goto done;
+    }
+  }
+done:
+  if (!filter_func.id)
+    filter_func.function();
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c
new file mode 100644
index 0000000000000000000000000000000000000000..feebdb7a6c9b8981d7be31dd1c741f9e36738515
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-O3" } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+typedef struct filter_list_entry {
+  const char *name;
+  int id;
+  void (*function)();
+} filter_list_entry;
+
+static const filter_list_entry filter_list[9] = {0};
+
+void php_zval_filter(int filter, int id1) {
+  filter_list_entry filter_func;
+
+  int size = 9;
+  for (int i = 0; i < size; ++i) {
+    if (filter_list[i].id == filter) {
+      filter_func = filter_list[i];
+      goto done;
+    }
+  }
+
+  for (int i = 0; i < size; ++i) {
+    if (filter_list[i].id == 0x0204) {
+      filter_func = filter_list[i];
+      goto done;
+    }
+  }
+done:
+  if (!filter_func.id)
+    filter_func.function();
+}
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 3f974d6d839e32516ae316f28ca25316e43d7d86..b5e158bc5cfb5107d5ff461e489d306f81e090d0 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -1917,7 +1917,6 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
 	  doms.safe_push (e->dest);
 	}
 
-      iterate_fix_dominators (CDI_DOMINATORS, doms, false);
       if (updated_doms)
 	updated_doms->safe_splice (doms);
     }
@@ -1925,7 +1924,9 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
   free (new_bbs);
   free (bbs);
 
-  checking_verify_dominators (CDI_DOMINATORS);
+  /* If we're peeling for vectorization then delay verifying dominators.  */
+  if (!flow_loops || !multiple_exits_p)
+    checking_verify_dominators (CDI_DOMINATORS);
 
   return new_loop;
 }
@@ -3404,7 +3405,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
       epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop;
       edge epilog_e = vect_epilogues ? e : scalar_e;
       edge new_epilog_e = NULL;
-      auto_vec<basic_block> doms;
+      auto_vec<basic_block>& doms = LOOP_VINFO_DOMS_NEED_UPDATE (loop_vinfo);
       epilog
 	= slpeel_tree_duplicate_loop_to_edge_cfg (loop, e, epilog, epilog_e, e,
 						  &new_epilog_e, true, &doms);
@@ -3554,10 +3555,6 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
 	  scale_loop_profile (epilog, prob_epilog, -1);
 	}
 
-      /* Recalculate the dominators after adding the guard edge.  */
-      if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
-	iterate_fix_dominators (CDI_DOMINATORS, doms, false);
-
       /* When we do not have a loop-around edge to the epilog we know
 	 the vector loop covered at least VF scalar iterations unless
 	 we have early breaks.
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 35f1f8c7d4245135ace7ffff40ff9be548919587..ab19ad6a6be516e3ee1f0fbeaaeeffeae1fb900f 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -11987,7 +11987,12 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
   /* Handle any code motion that we need to for early-break vectorization after
      we've done peeling but just before we start vectorizing.  */
   if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
-    move_early_exit_stmts (loop_vinfo);
+    {
+      /* Recalculate the dominators.  */
+      iterate_fix_dominators (CDI_DOMINATORS,
+			      LOOP_VINFO_DOMS_NEED_UPDATE (loop_vinfo), false);
+      move_early_exit_stmts (loop_vinfo);
+    }
 
   /* Schedule the SLP instances first, then handle loop vectorization
      below.  */
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index db44d730b7026492c4dd8c394ab505f227edfc8e..6bb5b6d69a8340c0b68cbcbbb110c0380b7d73aa 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -961,6 +961,10 @@ public:
   /* Statements whose VUSES need updating if early break vectorization is to
      happen.  */
   auto_vec<gimple*> early_break_vuses;
+
+  /* Dominators that need to be recalculated that have been deferred until after
+     all peeling.  */
+  auto_vec<basic_block> dominators_needing_update;
 } *loop_vec_info;
 
 /* Access Functions.  */
@@ -1021,6 +1025,7 @@ public:
   (single_pred ((L)->loop->latch) != (L)->vec_loop_iv_exit->src)
 #define LOOP_VINFO_EARLY_BRK_DEST_BB(L)    (L)->early_break_dest_bb
 #define LOOP_VINFO_EARLY_BRK_VUSES(L)      (L)->early_break_vuses
+#define LOOP_VINFO_DOMS_NEED_UPDATE(L)     (L)->dominators_needing_update
 #define LOOP_VINFO_LOOP_CONDS(L)           (L)->conds
 #define LOOP_VINFO_LOOP_IV_COND(L)         (L)->loop_iv_cond
 #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies

Comments

Richard Biener Feb. 26, 2024, 10:33 a.m. UTC | #1
On Sun, 25 Feb 2024, Tamar Christina wrote:

> Hi All,
> 
> The testcase shows an interesting case where we have multiple loops sharing a
> live value and have an early exit that go to the same location.  The additional
> complication is that on x86_64 with -mavx we seem to also do prologue peeling
> on the loops.
> 
> We correctly identify which BB we need their dominators updated for, but we do
> so too early.
> 
> Instead of adding more dominator update we can solve this by for the cases with
> multiple exits not to verify dominators at the end of peeling if peeling for
> vectorization.
> 
> We can then perform the final dominator updates just before vectorization when
> all loop transformations are done.

What's the actual CFG transform that happens between the old and the new
place?  I see a possible edge splitting but where is the one that makes
this patch work?

> This also means we reduce the number of dominator updates needed by at least
> 50% and fixes the ICE.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and
> x86_64-pc-linux-gnu no issues.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
> 	PR tree-optimization/114081
> 	PR tree-optimization/113290
> 	* tree-vect-loop-manip.cc (slpeel_tree_duplicate_loop_to_edge_cfg):
> 	Skip dominator update when multiple exit.
> 	(vect_do_peeling): Remove multiple exit dominator update.
> 	* tree-vect-loop.cc (vect_transform_loop): Update dominators when
> 	multiple exits.
> 	* tree-vectorizer.h (LOOP_VINFO_DOMS_NEED_UPDATE,
> 			     dominators_needing_update): New.
> 
> gcc/testsuite/ChangeLog:
> 
> 	PR tree-optimization/114081
> 	PR tree-optimization/113290
> 	* gcc.dg/vect/vect-early-break_120-pr114081.c: New test.
> 	* gcc.dg/vect/vect-early-break_121-pr114081.c: New test.
> 
> --- inline copy of patch -- 
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..2cd4ce1e4ac573ba6e41730fd2216f0ec8061376
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c
> @@ -0,0 +1,38 @@
> +/* { dg-do compile } */
> +/* { dg-add-options vect_early_break } */
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-additional-options "-O3" } */
> +
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +
> +typedef struct filter_list_entry {
> +  const char *name;
> +  int id;
> +  void (*function)();
> +} filter_list_entry;
> +
> +static const filter_list_entry filter_list[9] = {0};
> +
> +void php_zval_filter(int filter, int id1) {
> +  filter_list_entry filter_func;
> +
> +  int size = 9;
> +  for (int i = 0; i < size; ++i) {
> +    if (filter_list[i].id == filter) {
> +      filter_func = filter_list[i];
> +      goto done;
> +    }
> +  }
> +
> +#pragma GCC novector
> +  for (int i = 0; i < size; ++i) {
> +    if (filter_list[i].id == 0x0204) {
> +      filter_func = filter_list[i];
> +      goto done;
> +    }
> +  }
> +done:
> +  if (!filter_func.id)
> +    filter_func.function();
> +}
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..feebdb7a6c9b8981d7be31dd1c741f9e36738515
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c
> @@ -0,0 +1,37 @@
> +/* { dg-do compile } */
> +/* { dg-add-options vect_early_break } */
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-additional-options "-O3" } */
> +
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +
> +typedef struct filter_list_entry {
> +  const char *name;
> +  int id;
> +  void (*function)();
> +} filter_list_entry;
> +
> +static const filter_list_entry filter_list[9] = {0};
> +
> +void php_zval_filter(int filter, int id1) {
> +  filter_list_entry filter_func;
> +
> +  int size = 9;
> +  for (int i = 0; i < size; ++i) {
> +    if (filter_list[i].id == filter) {
> +      filter_func = filter_list[i];
> +      goto done;
> +    }
> +  }
> +
> +  for (int i = 0; i < size; ++i) {
> +    if (filter_list[i].id == 0x0204) {
> +      filter_func = filter_list[i];
> +      goto done;
> +    }
> +  }
> +done:
> +  if (!filter_func.id)
> +    filter_func.function();
> +}
> diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
> index 3f974d6d839e32516ae316f28ca25316e43d7d86..b5e158bc5cfb5107d5ff461e489d306f81e090d0 100644
> --- a/gcc/tree-vect-loop-manip.cc
> +++ b/gcc/tree-vect-loop-manip.cc
> @@ -1917,7 +1917,6 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
>  	  doms.safe_push (e->dest);
>  	}
>  
> -      iterate_fix_dominators (CDI_DOMINATORS, doms, false);
>        if (updated_doms)
>  	updated_doms->safe_splice (doms);
>      }
> @@ -1925,7 +1924,9 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
>    free (new_bbs);
>    free (bbs);
>  
> -  checking_verify_dominators (CDI_DOMINATORS);
> +  /* If we're peeling for vectorization then delay verifying dominators.  */
> +  if (!flow_loops || !multiple_exits_p)
> +    checking_verify_dominators (CDI_DOMINATORS);
>  
>    return new_loop;
>  }
> @@ -3404,7 +3405,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
>        epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop;
>        edge epilog_e = vect_epilogues ? e : scalar_e;
>        edge new_epilog_e = NULL;
> -      auto_vec<basic_block> doms;
> +      auto_vec<basic_block>& doms = LOOP_VINFO_DOMS_NEED_UPDATE (loop_vinfo);
>        epilog
>  	= slpeel_tree_duplicate_loop_to_edge_cfg (loop, e, epilog, epilog_e, e,
>  						  &new_epilog_e, true, &doms);
> @@ -3554,10 +3555,6 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
>  	  scale_loop_profile (epilog, prob_epilog, -1);
>  	}
>  
> -      /* Recalculate the dominators after adding the guard edge.  */
> -      if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
> -	iterate_fix_dominators (CDI_DOMINATORS, doms, false);
> -
>        /* When we do not have a loop-around edge to the epilog we know
>  	 the vector loop covered at least VF scalar iterations unless
>  	 we have early breaks.
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 35f1f8c7d4245135ace7ffff40ff9be548919587..ab19ad6a6be516e3ee1f0fbeaaeeffeae1fb900f 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -11987,7 +11987,12 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
>    /* Handle any code motion that we need to for early-break vectorization after
>       we've done peeling but just before we start vectorizing.  */
>    if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
> -    move_early_exit_stmts (loop_vinfo);
> +    {
> +      /* Recalculate the dominators.  */
> +      iterate_fix_dominators (CDI_DOMINATORS,
> +			      LOOP_VINFO_DOMS_NEED_UPDATE (loop_vinfo), false);
> +      move_early_exit_stmts (loop_vinfo);
> +    }
>  
>    /* Schedule the SLP instances first, then handle loop vectorization
>       below.  */
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index db44d730b7026492c4dd8c394ab505f227edfc8e..6bb5b6d69a8340c0b68cbcbbb110c0380b7d73aa 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -961,6 +961,10 @@ public:
>    /* Statements whose VUSES need updating if early break vectorization is to
>       happen.  */
>    auto_vec<gimple*> early_break_vuses;
> +
> +  /* Dominators that need to be recalculated that have been deferred until after
> +     all peeling.  */
> +  auto_vec<basic_block> dominators_needing_update;
>  } *loop_vec_info;
>  
>  /* Access Functions.  */
> @@ -1021,6 +1025,7 @@ public:
>    (single_pred ((L)->loop->latch) != (L)->vec_loop_iv_exit->src)
>  #define LOOP_VINFO_EARLY_BRK_DEST_BB(L)    (L)->early_break_dest_bb
>  #define LOOP_VINFO_EARLY_BRK_VUSES(L)      (L)->early_break_vuses
> +#define LOOP_VINFO_DOMS_NEED_UPDATE(L)     (L)->dominators_needing_update
>  #define LOOP_VINFO_LOOP_CONDS(L)           (L)->conds
>  #define LOOP_VINFO_LOOP_IV_COND(L)         (L)->loop_iv_cond
>  #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies
> 
> 
> 
> 
>
Tamar Christina Feb. 26, 2024, 12:10 p.m. UTC | #2
> > The testcase shows an interesting case where we have multiple loops sharing a
> > live value and have an early exit that go to the same location.  The additional
> > complication is that on x86_64 with -mavx we seem to also do prologue peeling
> > on the loops.
> >
> > We correctly identify which BB we need their dominators updated for, but we do
> > so too early.
> >
> > Instead of adding more dominator update we can solve this by for the cases with
> > multiple exits not to verify dominators at the end of peeling if peeling for
> > vectorization.
> >
> > We can then perform the final dominator updates just before vectorization when
> > all loop transformations are done.
> 
> What's the actual CFG transform that happens between the old and the new
> place?  I see a possible edge splitting but where is the one that makes
> this patch work?

It's not one but two.
1. loop 1 is prologue peeled. This ICEs because the dominator update is only happening
    for epilogue peeling.  Note that loop 1 here dominates 21 and the ICE is:

ice.c: In function 'void php_zval_filter(int, int)':
ice.c:7:6: error: dominator of 14 should be 21, not 3
    7 | void php_zval_filter(int filter, int id1) {
      |      ^~~~~~~~~~~~~~~
ice.c:7:6: error: dominator of 10 should be 21, not 3
during GIMPLE pass: vect
dump file: a-ice.c.179t.vect

This can be simply fixed by just moving the dom update code down:

diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index a5202f32e27..e88948370c6 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -1845,13 +1845,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
         to the original function exit we recorded.  Other exits are already
         correct.  */
       if (multiple_exits_p)
-       {
-         update_loop = new_loop;
-         doms = get_all_dominated_blocks (CDI_DOMINATORS, loop->header);
-         for (unsigned i = 0; i < doms.length (); ++i)
-           if (flow_bb_inside_loop_p (loop, doms[i]))
-             doms.unordered_remove (i);
-       }
+       update_loop = new_loop;
     }
   else /* Add the copy at entry.  */
     {
@@ -1906,6 +1900,11 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,

   if (multiple_exits_p)
     {
+      doms = get_all_dominated_blocks (CDI_DOMINATORS, loop->header);
+      for (unsigned i = 0; i < doms.length (); ++i)
+       if (flow_bb_inside_loop_p (loop, doms[i]))
+         doms.unordered_remove (i);
+
       for (edge e : get_loop_exit_edges (update_loop))
        {
          edge ex;

with that done, the next ICE comes along. Loop 1 is peeled again, but this time for epilogue.
however loop 1 no longer dominates the exits as the prologue peeled loop does.

So we don't find anything to update and ice with the second ICE:

ice.c: In function 'void php_zval_filter(int, int)':
ice.c:7:6: error: dominator of 14 should be 2, not 21
    7 | void php_zval_filter(int filter, int id1) {
      |      ^~~~~~~~~~~~~~~
ice.c:7:6: error: dominator of 10 should be 2, not 21
during GIMPLE pass: vect
dump file: a-ice.c.179t.vect

because the prologue loop no longer dominates them due to the skip edge.  This is why delaying
works because we know we have to update the dominators of 14 and 10, but to what we don't know
yet.

Tamar

> 
> > This also means we reduce the number of dominator updates needed by at least
> > 50% and fixes the ICE.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu and
> > x86_64-pc-linux-gnu no issues.
> >
> > Ok for master?
> >
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> > 	PR tree-optimization/114081
> > 	PR tree-optimization/113290
> > 	* tree-vect-loop-manip.cc (slpeel_tree_duplicate_loop_to_edge_cfg):
> > 	Skip dominator update when multiple exit.
> > 	(vect_do_peeling): Remove multiple exit dominator update.
> > 	* tree-vect-loop.cc (vect_transform_loop): Update dominators when
> > 	multiple exits.
> > 	* tree-vectorizer.h (LOOP_VINFO_DOMS_NEED_UPDATE,
> > 			     dominators_needing_update): New.
> >
> > gcc/testsuite/ChangeLog:
> >
> > 	PR tree-optimization/114081
> > 	PR tree-optimization/113290
> > 	* gcc.dg/vect/vect-early-break_120-pr114081.c: New test.
> > 	* gcc.dg/vect/vect-early-break_121-pr114081.c: New test.
> >
> > --- inline copy of patch --
> > diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c
> > new file mode 100644
> > index
> 0000000000000000000000000000000000000000..2cd4ce1e4ac573ba6e4173
> 0fd2216f0ec8061376
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c
> > @@ -0,0 +1,38 @@
> > +/* { dg-do compile } */
> > +/* { dg-add-options vect_early_break } */
> > +/* { dg-require-effective-target vect_early_break } */
> > +/* { dg-require-effective-target vect_int } */
> > +/* { dg-additional-options "-O3" } */
> > +
> > +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> > +
> > +typedef struct filter_list_entry {
> > +  const char *name;
> > +  int id;
> > +  void (*function)();
> > +} filter_list_entry;
> > +
> > +static const filter_list_entry filter_list[9] = {0};
> > +
> > +void php_zval_filter(int filter, int id1) {
> > +  filter_list_entry filter_func;
> > +
> > +  int size = 9;
> > +  for (int i = 0; i < size; ++i) {
> > +    if (filter_list[i].id == filter) {
> > +      filter_func = filter_list[i];
> > +      goto done;
> > +    }
> > +  }
> > +
> > +#pragma GCC novector
> > +  for (int i = 0; i < size; ++i) {
> > +    if (filter_list[i].id == 0x0204) {
> > +      filter_func = filter_list[i];
> > +      goto done;
> > +    }
> > +  }
> > +done:
> > +  if (!filter_func.id)
> > +    filter_func.function();
> > +}
> > diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c
> > new file mode 100644
> > index
> 0000000000000000000000000000000000000000..feebdb7a6c9b8981d7be31
> dd1c741f9e36738515
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c
> > @@ -0,0 +1,37 @@
> > +/* { dg-do compile } */
> > +/* { dg-add-options vect_early_break } */
> > +/* { dg-require-effective-target vect_early_break } */
> > +/* { dg-require-effective-target vect_int } */
> > +/* { dg-additional-options "-O3" } */
> > +
> > +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> > +
> > +typedef struct filter_list_entry {
> > +  const char *name;
> > +  int id;
> > +  void (*function)();
> > +} filter_list_entry;
> > +
> > +static const filter_list_entry filter_list[9] = {0};
> > +
> > +void php_zval_filter(int filter, int id1) {
> > +  filter_list_entry filter_func;
> > +
> > +  int size = 9;
> > +  for (int i = 0; i < size; ++i) {
> > +    if (filter_list[i].id == filter) {
> > +      filter_func = filter_list[i];
> > +      goto done;
> > +    }
> > +  }
> > +
> > +  for (int i = 0; i < size; ++i) {
> > +    if (filter_list[i].id == 0x0204) {
> > +      filter_func = filter_list[i];
> > +      goto done;
> > +    }
> > +  }
> > +done:
> > +  if (!filter_func.id)
> > +    filter_func.function();
> > +}
> > diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
> > index
> 3f974d6d839e32516ae316f28ca25316e43d7d86..b5e158bc5cfb5107d5ff461e
> 489d306f81e090d0 100644
> > --- a/gcc/tree-vect-loop-manip.cc
> > +++ b/gcc/tree-vect-loop-manip.cc
> > @@ -1917,7 +1917,6 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop
> *loop, edge loop_exit,
> >  	  doms.safe_push (e->dest);
> >  	}
> >
> > -      iterate_fix_dominators (CDI_DOMINATORS, doms, false);
> >        if (updated_doms)
> >  	updated_doms->safe_splice (doms);
> >      }
> > @@ -1925,7 +1924,9 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop
> *loop, edge loop_exit,
> >    free (new_bbs);
> >    free (bbs);
> >
> > -  checking_verify_dominators (CDI_DOMINATORS);
> > +  /* If we're peeling for vectorization then delay verifying dominators.  */
> > +  if (!flow_loops || !multiple_exits_p)
> > +    checking_verify_dominators (CDI_DOMINATORS);
> >
> >    return new_loop;
> >  }
> > @@ -3404,7 +3405,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree
> niters, tree nitersm1,
> >        epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop;
> >        edge epilog_e = vect_epilogues ? e : scalar_e;
> >        edge new_epilog_e = NULL;
> > -      auto_vec<basic_block> doms;
> > +      auto_vec<basic_block>& doms = LOOP_VINFO_DOMS_NEED_UPDATE
> (loop_vinfo);
> >        epilog
> >  	= slpeel_tree_duplicate_loop_to_edge_cfg (loop, e, epilog, epilog_e, e,
> >  						  &new_epilog_e, true, &doms);
> > @@ -3554,10 +3555,6 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree
> niters, tree nitersm1,
> >  	  scale_loop_profile (epilog, prob_epilog, -1);
> >  	}
> >
> > -      /* Recalculate the dominators after adding the guard edge.  */
> > -      if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
> > -	iterate_fix_dominators (CDI_DOMINATORS, doms, false);
> > -
> >        /* When we do not have a loop-around edge to the epilog we know
> >  	 the vector loop covered at least VF scalar iterations unless
> >  	 we have early breaks.
> > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> > index
> 35f1f8c7d4245135ace7ffff40ff9be548919587..ab19ad6a6be516e3ee1f0fbeaae
> effeae1fb900f 100644
> > --- a/gcc/tree-vect-loop.cc
> > +++ b/gcc/tree-vect-loop.cc
> > @@ -11987,7 +11987,12 @@ vect_transform_loop (loop_vec_info loop_vinfo,
> gimple *loop_vectorized_call)
> >    /* Handle any code motion that we need to for early-break vectorization after
> >       we've done peeling but just before we start vectorizing.  */
> >    if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
> > -    move_early_exit_stmts (loop_vinfo);
> > +    {
> > +      /* Recalculate the dominators.  */
> > +      iterate_fix_dominators (CDI_DOMINATORS,
> > +			      LOOP_VINFO_DOMS_NEED_UPDATE (loop_vinfo),
> false);
> > +      move_early_exit_stmts (loop_vinfo);
> > +    }
> >
> >    /* Schedule the SLP instances first, then handle loop vectorization
> >       below.  */
> > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> > index
> db44d730b7026492c4dd8c394ab505f227edfc8e..6bb5b6d69a8340c0b68cbcb
> bb110c0380b7d73aa 100644
> > --- a/gcc/tree-vectorizer.h
> > +++ b/gcc/tree-vectorizer.h
> > @@ -961,6 +961,10 @@ public:
> >    /* Statements whose VUSES need updating if early break vectorization is to
> >       happen.  */
> >    auto_vec<gimple*> early_break_vuses;
> > +
> > +  /* Dominators that need to be recalculated that have been deferred until after
> > +     all peeling.  */
> > +  auto_vec<basic_block> dominators_needing_update;
> >  } *loop_vec_info;
> >
> >  /* Access Functions.  */
> > @@ -1021,6 +1025,7 @@ public:
> >    (single_pred ((L)->loop->latch) != (L)->vec_loop_iv_exit->src)
> >  #define LOOP_VINFO_EARLY_BRK_DEST_BB(L)    (L)->early_break_dest_bb
> >  #define LOOP_VINFO_EARLY_BRK_VUSES(L)      (L)->early_break_vuses
> > +#define LOOP_VINFO_DOMS_NEED_UPDATE(L)     (L)-
> >dominators_needing_update
> >  #define LOOP_VINFO_LOOP_CONDS(L)           (L)->conds
> >  #define LOOP_VINFO_LOOP_IV_COND(L)         (L)->loop_iv_cond
> >  #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies
> >
> >
> >
> >
> >
> 
> --
> Richard Biener <rguenther@suse.de>
> SUSE Software Solutions Germany GmbH,
> Frankenstrasse 146, 90461 Nuernberg, Germany;
> GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
Richard Biener Feb. 26, 2024, 1:34 p.m. UTC | #3
On Mon, 26 Feb 2024, Tamar Christina wrote:

> > > The testcase shows an interesting case where we have multiple loops sharing a
> > > live value and have an early exit that go to the same location.  The additional
> > > complication is that on x86_64 with -mavx we seem to also do prologue peeling
> > > on the loops.
> > >
> > > We correctly identify which BB we need their dominators updated for, but we do
> > > so too early.
> > >
> > > Instead of adding more dominator update we can solve this by for the cases with
> > > multiple exits not to verify dominators at the end of peeling if peeling for
> > > vectorization.
> > >
> > > We can then perform the final dominator updates just before vectorization when
> > > all loop transformations are done.
> > 
> > What's the actual CFG transform that happens between the old and the new
> > place?  I see a possible edge splitting but where is the one that makes
> > this patch work?
> 
> It's not one but two.
> 1. loop 1 is prologue peeled. This ICEs because the dominator update is only happening
>     for epilogue peeling.  Note that loop 1 here dominates 21 and the ICE is:
> 
> ice.c: In function 'void php_zval_filter(int, int)':
> ice.c:7:6: error: dominator of 14 should be 21, not 3
>     7 | void php_zval_filter(int filter, int id1) {
>       |      ^~~~~~~~~~~~~~~
> ice.c:7:6: error: dominator of 10 should be 21, not 3
> during GIMPLE pass: vect
> dump file: a-ice.c.179t.vect
> 
> This can be simply fixed by just moving the dom update code down:
> 
> diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
> index a5202f32e27..e88948370c6 100644
> --- a/gcc/tree-vect-loop-manip.cc
> +++ b/gcc/tree-vect-loop-manip.cc
> @@ -1845,13 +1845,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
>          to the original function exit we recorded.  Other exits are already
>          correct.  */
>        if (multiple_exits_p)
> -       {
> -         update_loop = new_loop;
> -         doms = get_all_dominated_blocks (CDI_DOMINATORS, loop->header);
> -         for (unsigned i = 0; i < doms.length (); ++i)
> -           if (flow_bb_inside_loop_p (loop, doms[i]))
> -             doms.unordered_remove (i);
> -       }
> +       update_loop = new_loop;
>      }
>    else /* Add the copy at entry.  */
>      {
> @@ -1906,6 +1900,11 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
> 
>    if (multiple_exits_p)
>      {
> +      doms = get_all_dominated_blocks (CDI_DOMINATORS, loop->header);
> +      for (unsigned i = 0; i < doms.length (); ++i)
> +       if (flow_bb_inside_loop_p (loop, doms[i]))
> +         doms.unordered_remove (i);
> +
>        for (edge e : get_loop_exit_edges (update_loop))
>         {
>           edge ex;
> 
> with that done, the next ICE comes along. Loop 1 is peeled again, but this time for epilogue.
> however loop 1 no longer dominates the exits as the prologue peeled loop does.
> 
> So we don't find anything to update and ice with the second ICE:
> 
> ice.c: In function 'void php_zval_filter(int, int)':
> ice.c:7:6: error: dominator of 14 should be 2, not 21
>     7 | void php_zval_filter(int filter, int id1) {
>       |      ^~~~~~~~~~~~~~~
> ice.c:7:6: error: dominator of 10 should be 2, not 21
> during GIMPLE pass: vect
> dump file: a-ice.c.179t.vect

OK, that seems to be because the condition around the prologue doesn't
update everything for the skip_prolog case (slpeel_add_loop_guard
fails to update for multi-exits).

A more "general" dominance update in the face of multiple exits might
be something like

          for (edge alt_e : loop_exits)
            {
              if (alt_e == loop_exit)
                continue;
              basic_block old_dom
                = get_immediate_dominator (CDI_DOMINATORS, alt_e->dest);
              if (flow_bb_inside_loop_p (loop, old_dom))
                {
                  auto_vec<basic_block, 8> queue;
                  for (auto son = first_dom_son (CDI_DOMINATORS, old_dom);
                       son; son = next_dom_son (CDI_DOMINATORS, son))
                    if (!flow_bb_inside_loop_p (loop, son))
                      queue.safe_push (son);
                  for (auto son : queue)
                    set_immediate_dominator (CDI_DOMINATORS,
                                             son, get_bb_copy (old_dom));
                }
            }

basically we know the new most dominating place and we know where we
wire in new edges (the exits, or in the new guard the skip point
destination).  From there we can look at the old immediate dominator
which we know we have to adjust - but with multiple exits there might
be other blocks also dominated by this block and those we need to adjust
as well.  The above can be possibly split out to a helper getting
old_dom and the "new dom" (the get_bb_copy one) and a predicate
that might generally reduce to !dominated_by_p the "exit dest".

I'm not sure your way of identifying the "wrong" dominance blocks
using

      for (edge e : get_loop_exit_edges (update_loop))
        {
          edge ex;
          edge_iterator ei;
          FOR_EACH_EDGE (ex, ei, e->dest->succs)
            {
              /* Find the first non-fallthrough block as fall-throughs 
can't
                 dominate other blocks.  */
              if (single_succ_p (ex->dest))
                {
                  doms.safe_push (ex->dest);
                  ex = single_succ_edge (ex->dest);
                }
              doms.safe_push (ex->dest);
            }
          doms.safe_push (e->dest);

is sound, but you might pick them up accidentially?

I'm giving the above a more thorough try ...

Richard.

> because the prologue loop no longer dominates them due to the skip edge.  This is why delaying
> works because we know we have to update the dominators of 14 and 10, but to what we don't know
> yet.
> 
> Tamar
> 
> > 
> > > This also means we reduce the number of dominator updates needed by at least
> > > 50% and fixes the ICE.
> > >
> > > Bootstrapped Regtested on aarch64-none-linux-gnu and
> > > x86_64-pc-linux-gnu no issues.
> > >
> > > Ok for master?
> > >
> > > Thanks,
> > > Tamar
> > >
> > > gcc/ChangeLog:
> > >
> > > 	PR tree-optimization/114081
> > > 	PR tree-optimization/113290
> > > 	* tree-vect-loop-manip.cc (slpeel_tree_duplicate_loop_to_edge_cfg):
> > > 	Skip dominator update when multiple exit.
> > > 	(vect_do_peeling): Remove multiple exit dominator update.
> > > 	* tree-vect-loop.cc (vect_transform_loop): Update dominators when
> > > 	multiple exits.
> > > 	* tree-vectorizer.h (LOOP_VINFO_DOMS_NEED_UPDATE,
> > > 			     dominators_needing_update): New.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > 	PR tree-optimization/114081
> > > 	PR tree-optimization/113290
> > > 	* gcc.dg/vect/vect-early-break_120-pr114081.c: New test.
> > > 	* gcc.dg/vect/vect-early-break_121-pr114081.c: New test.
> > >
> > > --- inline copy of patch --
> > > diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c
> > b/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c
> > > new file mode 100644
> > > index
> > 0000000000000000000000000000000000000000..2cd4ce1e4ac573ba6e4173
> > 0fd2216f0ec8061376
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c
> > > @@ -0,0 +1,38 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-add-options vect_early_break } */
> > > +/* { dg-require-effective-target vect_early_break } */
> > > +/* { dg-require-effective-target vect_int } */
> > > +/* { dg-additional-options "-O3" } */
> > > +
> > > +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> > > +
> > > +typedef struct filter_list_entry {
> > > +  const char *name;
> > > +  int id;
> > > +  void (*function)();
> > > +} filter_list_entry;
> > > +
> > > +static const filter_list_entry filter_list[9] = {0};
> > > +
> > > +void php_zval_filter(int filter, int id1) {
> > > +  filter_list_entry filter_func;
> > > +
> > > +  int size = 9;
> > > +  for (int i = 0; i < size; ++i) {
> > > +    if (filter_list[i].id == filter) {
> > > +      filter_func = filter_list[i];
> > > +      goto done;
> > > +    }
> > > +  }
> > > +
> > > +#pragma GCC novector
> > > +  for (int i = 0; i < size; ++i) {
> > > +    if (filter_list[i].id == 0x0204) {
> > > +      filter_func = filter_list[i];
> > > +      goto done;
> > > +    }
> > > +  }
> > > +done:
> > > +  if (!filter_func.id)
> > > +    filter_func.function();
> > > +}
> > > diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c
> > b/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c
> > > new file mode 100644
> > > index
> > 0000000000000000000000000000000000000000..feebdb7a6c9b8981d7be31
> > dd1c741f9e36738515
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c
> > > @@ -0,0 +1,37 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-add-options vect_early_break } */
> > > +/* { dg-require-effective-target vect_early_break } */
> > > +/* { dg-require-effective-target vect_int } */
> > > +/* { dg-additional-options "-O3" } */
> > > +
> > > +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> > > +
> > > +typedef struct filter_list_entry {
> > > +  const char *name;
> > > +  int id;
> > > +  void (*function)();
> > > +} filter_list_entry;
> > > +
> > > +static const filter_list_entry filter_list[9] = {0};
> > > +
> > > +void php_zval_filter(int filter, int id1) {
> > > +  filter_list_entry filter_func;
> > > +
> > > +  int size = 9;
> > > +  for (int i = 0; i < size; ++i) {
> > > +    if (filter_list[i].id == filter) {
> > > +      filter_func = filter_list[i];
> > > +      goto done;
> > > +    }
> > > +  }
> > > +
> > > +  for (int i = 0; i < size; ++i) {
> > > +    if (filter_list[i].id == 0x0204) {
> > > +      filter_func = filter_list[i];
> > > +      goto done;
> > > +    }
> > > +  }
> > > +done:
> > > +  if (!filter_func.id)
> > > +    filter_func.function();
> > > +}
> > > diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
> > > index
> > 3f974d6d839e32516ae316f28ca25316e43d7d86..b5e158bc5cfb5107d5ff461e
> > 489d306f81e090d0 100644
> > > --- a/gcc/tree-vect-loop-manip.cc
> > > +++ b/gcc/tree-vect-loop-manip.cc
> > > @@ -1917,7 +1917,6 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop
> > *loop, edge loop_exit,
> > >  	  doms.safe_push (e->dest);
> > >  	}
> > >
> > > -      iterate_fix_dominators (CDI_DOMINATORS, doms, false);
> > >        if (updated_doms)
> > >  	updated_doms->safe_splice (doms);
> > >      }
> > > @@ -1925,7 +1924,9 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop
> > *loop, edge loop_exit,
> > >    free (new_bbs);
> > >    free (bbs);
> > >
> > > -  checking_verify_dominators (CDI_DOMINATORS);
> > > +  /* If we're peeling for vectorization then delay verifying dominators.  */
> > > +  if (!flow_loops || !multiple_exits_p)
> > > +    checking_verify_dominators (CDI_DOMINATORS);
> > >
> > >    return new_loop;
> > >  }
> > > @@ -3404,7 +3405,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree
> > niters, tree nitersm1,
> > >        epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop;
> > >        edge epilog_e = vect_epilogues ? e : scalar_e;
> > >        edge new_epilog_e = NULL;
> > > -      auto_vec<basic_block> doms;
> > > +      auto_vec<basic_block>& doms = LOOP_VINFO_DOMS_NEED_UPDATE
> > (loop_vinfo);
> > >        epilog
> > >  	= slpeel_tree_duplicate_loop_to_edge_cfg (loop, e, epilog, epilog_e, e,
> > >  						  &new_epilog_e, true, &doms);
> > > @@ -3554,10 +3555,6 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree
> > niters, tree nitersm1,
> > >  	  scale_loop_profile (epilog, prob_epilog, -1);
> > >  	}
> > >
> > > -      /* Recalculate the dominators after adding the guard edge.  */
> > > -      if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
> > > -	iterate_fix_dominators (CDI_DOMINATORS, doms, false);
> > > -
> > >        /* When we do not have a loop-around edge to the epilog we know
> > >  	 the vector loop covered at least VF scalar iterations unless
> > >  	 we have early breaks.
> > > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> > > index
> > 35f1f8c7d4245135ace7ffff40ff9be548919587..ab19ad6a6be516e3ee1f0fbeaae
> > effeae1fb900f 100644
> > > --- a/gcc/tree-vect-loop.cc
> > > +++ b/gcc/tree-vect-loop.cc
> > > @@ -11987,7 +11987,12 @@ vect_transform_loop (loop_vec_info loop_vinfo,
> > gimple *loop_vectorized_call)
> > >    /* Handle any code motion that we need to for early-break vectorization after
> > >       we've done peeling but just before we start vectorizing.  */
> > >    if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
> > > -    move_early_exit_stmts (loop_vinfo);
> > > +    {
> > > +      /* Recalculate the dominators.  */
> > > +      iterate_fix_dominators (CDI_DOMINATORS,
> > > +			      LOOP_VINFO_DOMS_NEED_UPDATE (loop_vinfo),
> > false);
> > > +      move_early_exit_stmts (loop_vinfo);
> > > +    }
> > >
> > >    /* Schedule the SLP instances first, then handle loop vectorization
> > >       below.  */
> > > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> > > index
> > db44d730b7026492c4dd8c394ab505f227edfc8e..6bb5b6d69a8340c0b68cbcb
> > bb110c0380b7d73aa 100644
> > > --- a/gcc/tree-vectorizer.h
> > > +++ b/gcc/tree-vectorizer.h
> > > @@ -961,6 +961,10 @@ public:
> > >    /* Statements whose VUSES need updating if early break vectorization is to
> > >       happen.  */
> > >    auto_vec<gimple*> early_break_vuses;
> > > +
> > > +  /* Dominators that need to be recalculated that have been deferred until after
> > > +     all peeling.  */
> > > +  auto_vec<basic_block> dominators_needing_update;
> > >  } *loop_vec_info;
> > >
> > >  /* Access Functions.  */
> > > @@ -1021,6 +1025,7 @@ public:
> > >    (single_pred ((L)->loop->latch) != (L)->vec_loop_iv_exit->src)
> > >  #define LOOP_VINFO_EARLY_BRK_DEST_BB(L)    (L)->early_break_dest_bb
> > >  #define LOOP_VINFO_EARLY_BRK_VUSES(L)      (L)->early_break_vuses
> > > +#define LOOP_VINFO_DOMS_NEED_UPDATE(L)     (L)-
> > >dominators_needing_update
> > >  #define LOOP_VINFO_LOOP_CONDS(L)           (L)->conds
> > >  #define LOOP_VINFO_LOOP_IV_COND(L)         (L)->loop_iv_cond
> > >  #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies
> > >
> > >
> > >
> > >
> > >
> > 
> > --
> > Richard Biener <rguenther@suse.de>
> > SUSE Software Solutions Germany GmbH,
> > Frankenstrasse 146, 90461 Nuernberg, Germany;
> > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
>
diff mbox series

Patch

--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c
@@ -0,0 +1,38 @@ 
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-O3" } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+typedef struct filter_list_entry {
+  const char *name;
+  int id;
+  void (*function)();
+} filter_list_entry;
+
+static const filter_list_entry filter_list[9] = {0};
+
+void php_zval_filter(int filter, int id1) {
+  filter_list_entry filter_func;
+
+  int size = 9;
+  for (int i = 0; i < size; ++i) {
+    if (filter_list[i].id == filter) {
+      filter_func = filter_list[i];
+      goto done;
+    }
+  }
+
+#pragma GCC novector
+  for (int i = 0; i < size; ++i) {
+    if (filter_list[i].id == 0x0204) {
+      filter_func = filter_list[i];
+      goto done;
+    }
+  }
+done:
+  if (!filter_func.id)
+    filter_func.function();
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c
new file mode 100644
index 0000000000000000000000000000000000000000..feebdb7a6c9b8981d7be31dd1c741f9e36738515
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c
@@ -0,0 +1,37 @@ 
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-O3" } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+typedef struct filter_list_entry {
+  const char *name;
+  int id;
+  void (*function)();
+} filter_list_entry;
+
+static const filter_list_entry filter_list[9] = {0};
+
+void php_zval_filter(int filter, int id1) {
+  filter_list_entry filter_func;
+
+  int size = 9;
+  for (int i = 0; i < size; ++i) {
+    if (filter_list[i].id == filter) {
+      filter_func = filter_list[i];
+      goto done;
+    }
+  }
+
+  for (int i = 0; i < size; ++i) {
+    if (filter_list[i].id == 0x0204) {
+      filter_func = filter_list[i];
+      goto done;
+    }
+  }
+done:
+  if (!filter_func.id)
+    filter_func.function();
+}
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 3f974d6d839e32516ae316f28ca25316e43d7d86..b5e158bc5cfb5107d5ff461e489d306f81e090d0 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -1917,7 +1917,6 @@  slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
 	  doms.safe_push (e->dest);
 	}
 
-      iterate_fix_dominators (CDI_DOMINATORS, doms, false);
       if (updated_doms)
 	updated_doms->safe_splice (doms);
     }
@@ -1925,7 +1924,9 @@  slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
   free (new_bbs);
   free (bbs);
 
-  checking_verify_dominators (CDI_DOMINATORS);
+  /* If we're peeling for vectorization then delay verifying dominators.  */
+  if (!flow_loops || !multiple_exits_p)
+    checking_verify_dominators (CDI_DOMINATORS);
 
   return new_loop;
 }
@@ -3404,7 +3405,7 @@  vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
       epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop;
       edge epilog_e = vect_epilogues ? e : scalar_e;
       edge new_epilog_e = NULL;
-      auto_vec<basic_block> doms;
+      auto_vec<basic_block>& doms = LOOP_VINFO_DOMS_NEED_UPDATE (loop_vinfo);
       epilog
 	= slpeel_tree_duplicate_loop_to_edge_cfg (loop, e, epilog, epilog_e, e,
 						  &new_epilog_e, true, &doms);
@@ -3554,10 +3555,6 @@  vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
 	  scale_loop_profile (epilog, prob_epilog, -1);
 	}
 
-      /* Recalculate the dominators after adding the guard edge.  */
-      if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
-	iterate_fix_dominators (CDI_DOMINATORS, doms, false);
-
       /* When we do not have a loop-around edge to the epilog we know
 	 the vector loop covered at least VF scalar iterations unless
 	 we have early breaks.
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 35f1f8c7d4245135ace7ffff40ff9be548919587..ab19ad6a6be516e3ee1f0fbeaaeeffeae1fb900f 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -11987,7 +11987,12 @@  vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
   /* Handle any code motion that we need to for early-break vectorization after
      we've done peeling but just before we start vectorizing.  */
   if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
-    move_early_exit_stmts (loop_vinfo);
+    {
+      /* Recalculate the dominators.  */
+      iterate_fix_dominators (CDI_DOMINATORS,
+			      LOOP_VINFO_DOMS_NEED_UPDATE (loop_vinfo), false);
+      move_early_exit_stmts (loop_vinfo);
+    }
 
   /* Schedule the SLP instances first, then handle loop vectorization
      below.  */
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index db44d730b7026492c4dd8c394ab505f227edfc8e..6bb5b6d69a8340c0b68cbcbbb110c0380b7d73aa 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -961,6 +961,10 @@  public:
   /* Statements whose VUSES need updating if early break vectorization is to
      happen.  */
   auto_vec<gimple*> early_break_vuses;
+
+  /* Dominators that need to be recalculated that have been deferred until after
+     all peeling.  */
+  auto_vec<basic_block> dominators_needing_update;
 } *loop_vec_info;
 
 /* Access Functions.  */
@@ -1021,6 +1025,7 @@  public:
   (single_pred ((L)->loop->latch) != (L)->vec_loop_iv_exit->src)
 #define LOOP_VINFO_EARLY_BRK_DEST_BB(L)    (L)->early_break_dest_bb
 #define LOOP_VINFO_EARLY_BRK_VUSES(L)      (L)->early_break_vuses
+#define LOOP_VINFO_DOMS_NEED_UPDATE(L)     (L)->dominators_needing_update
 #define LOOP_VINFO_LOOP_CONDS(L)           (L)->conds
 #define LOOP_VINFO_LOOP_IV_COND(L)         (L)->loop_iv_cond
 #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies