diff mbox series

[modulo-sched] Fix PR92591

Message ID 31e99da8-0b2b-d19d-aeb2-253e7f57ca80@ispras.ru
State New
Headers show
Series [modulo-sched] Fix PR92591 | expand

Commit Message

Roman Zhuykov Dec. 12, 2019, 2:29 p.m. UTC
Hello.

As pointed out in the PR 
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92591#c1, the test can be 
fixed by DFA-checking more adjacent row sequences in the partial 
schedule.
I've found that on powerpc64 gcc.c-torture/execute/pr61682.c test 
catches same issue with -Os -fmodulo-sched-allow-regmoves with some 
non-zero sms-dfa-history parameter values, so I added that test using 
#include as second test into the patch.

Minor separate patch about modulo-sched parameters is also attached.
If no objection, I'll commit this two patches into trunk tomorrow 
together with my PR90001 fix.

Trunk and 8/9 branches succesfully regstrapped on x64, and 
cross-compiler check-gcc tested on ppc, ppc64, arm, aarch64, ia64 and 
s390. Certainly a lot of testing were also done with changing default 
sms-dfa-history value to some other than zero.

Roman

gcc/ChangeLog:

2019-12-10  Roman Zhuykov  <zhroma@ispras.ru>

	* modulo-sched.c (ps_add_node_check_conflicts): Improve checking
	for history > 0 case.

gcc/testsuite/ChangeLog:

2019-12-10  Roman Zhuykov  <zhroma@ispras.ru>

	* gcc.dg/pr92951-1.c: New test.
	* gcc.dg/pr92951-2.c: New test.
gcc/ChangeLog:

2019-11-26  Roman Zhuykov  <zhroma@ispras.ru>

	* modulo-sched.c (sms_schedule): Use param_sms_max_ii_factor
	value instead of macro.  Adjust comment.
	(sms_schedule_by_order): Use parameter value without macro.
	* params.opt: Add ranges for modulo scheduler parameters,
	set param_sms_max_ii_factor = 2 by default.

diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c
index 98af549..2dc9af7 100644
--- a/gcc/modulo-sched.c
+++ b/gcc/modulo-sched.c
@@ -1331,9 +1331,6 @@ setup_sched_infos (void)
    version may be entered.  Just a guess.  */
 #define PROB_SMS_ENOUGH_ITERATIONS 80
 
-/* Used to calculate the upper bound of ii.  */
-#define MAXII_FACTOR 2
-
 /* Main entry point, perform SMS scheduling on the loops of the function
    that consist of single basic blocks.  */
 static void
@@ -1597,7 +1594,7 @@ sms_schedule (void)
       rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
       mii = MAX (res_MII (g), rec_mii);
       mii = MAX (mii, 1);
-      maxii = MAX (max_asap, MAXII_FACTOR * mii);
+      maxii = MAX (max_asap, param_sms_max_ii_factor * mii);
 
       if (dump_file)
 	fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
@@ -1636,7 +1633,7 @@ sms_schedule (void)
 	      gcc_assert (stage_count >= 1);
 	    }
 
-	  /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
+	  /* The default value of param_sms_min_sc is 2 as stage count of
 	     1 means that there is no interleaving between iterations thus
 	     we let the scheduling passes do the job in this case.  */
 	  if (stage_count < param_sms_min_sc
@@ -1828,11 +1825,6 @@ sms_schedule (void)
    The window would then start and end on the same row, but with
    different "must precede" and "must follow" requirements.  */
 
-/* A limit on the number of cycles that resource conflicts can span.  ??? Should
-   be provided by DFA, and be dependent on the type of insn scheduled.  Currently
-   set to 0 to save compile time.  */
-#define DFA_HISTORY param_sms_dfa_history
-
 /* A threshold for the number of repeated unsuccessful attempts to insert
    an empty row, before we flush the partial schedule and start over.  */
 #define MAX_SPLIT_NUM 10
@@ -2136,7 +2128,12 @@ sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
   auto_sbitmap must_follow (num_nodes);
   auto_sbitmap tobe_scheduled (num_nodes);
 
-  partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
+  /* Value of param_sms_dfa_history is a limit on the number of cycles that
+     resource conflicts can span.  ??? Should be provided by DFA, and be
+     dependent on the type of insn scheduled.  Set to 0 by default to save
+     compile time.  */
+  partial_schedule_ptr ps = create_partial_schedule (ii, g,
+						     param_sms_dfa_history);
 
   bitmap_ones (tobe_scheduled);
   bitmap_clear (sched_nodes);
diff --git a/gcc/params.opt b/gcc/params.opt
index 586b539..d960f4a 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -809,7 +809,7 @@ Common Joined UInteger Var(param_slp_max_insns_in_bb) Init(1000) Param
 Maximum number of instructions in basic block to be considered for SLP vectorization.
 
 -param=sms-dfa-history=
-Common Joined UInteger Var(param_sms_dfa_history) Param
+Common Joined UInteger Var(param_sms_dfa_history) IntegerRange(0, 16) Param
 The number of cycles the swing modulo scheduler considers when checking conflicts using DFA.
 
 -param=sms-loop-average-count-threshold=
@@ -817,11 +817,11 @@ Common Joined UInteger Var(param_sms_loop_average_count_threshold) Param
 A threshold on the average loop count considered by the swing modulo scheduler.
 
 -param=sms-max-ii-factor=
-Common Joined UInteger Var(param_sms_max_ii_factor) Init(100) Param
+Common Joined UInteger Var(param_sms_max_ii_factor) Init(2) IntegerRange(1, 16) Param
 A factor for tuning the upper bound that swing modulo scheduler uses for scheduling a loop.
 
 -param=sms-min-sc=
-Common Joined UInteger Var(param_sms_min_sc) Init(2) IntegerRange(1, 65536) Param
+Common Joined UInteger Var(param_sms_min_sc) Init(2) IntegerRange(1, 2) Param
 The minimum value of stage count that swing modulo scheduler will generate.
 
 -param=sra-max-scalarization-size-Osize=

Comments

Roman Zhuykov Dec. 17, 2019, 7:40 p.m. UTC | #1
Hello.

> As pointed out in the PR
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92591#c1, the test can be
> fixed by DFA-checking more adjacent row sequences in the partial
> schedule.
> I've found that on powerpc64 gcc.c-torture/execute/pr61682.c test
> catches same issue with -Os -fmodulo-sched-allow-regmoves with some
> non-zero sms-dfa-history parameter values, so I added that test using
> #include as second test into the patch.
> 
> Minor separate patch about modulo-sched parameters is also attached.
> If no objection, I'll commit this two patches into trunk tomorrow
> together with my PR90001 fix.
> 
> Trunk and 8/9 branches succesfully regstrapped on x64, and
> cross-compiler check-gcc tested on ppc, ppc64, arm, aarch64, ia64 and
> s390. Certainly a lot of testing were also done with changing default
> sms-dfa-history value to some other than zero.

I think this should be backported into 9 and 8 branches, because second 
example gives an ICE there.  But I'm not sure about backporting 
sms-dfa-history upper bound limitation (<=16) into params.def in 
branches.  Compile-time may grow dramatically for huge values like 1000, 
so we have to limit it.  Is it ok to limit the parameter, or maybe it's 
better to implement some "history=MIN(history, 16)" logic in 
modulo-sched.c ?

I see that sometimes parameter limitation is backported, examples are:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80663
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79576

While at it, maybe you have some thoughts about selected value of 16.  
Maximum reasonable value for sms-dfa-history param seems to be max 
latency between two insns on target platform (calculated by dep_cost 
function in haifa-sched.c).

I'm posting full backport patch here, it suits 8/9 branches. Jakub and 
Richard, is it OK ?

Roman

Backport from mainline
gcc/ChangeLog:

2019-12-17  Roman Zhuykov  <zhroma@ispras.ru>

	* modulo-sched.c (ps_add_node_check_conflicts): Improve checking
	for history > 0 case.
	* params.def (sms-dfa-history): Limit to 16.

gcc/testsuite/ChangeLog:

2019-12-17  Roman Zhuykov  <zhroma@ispras.ru>

	* gcc.dg/pr92951-1.c: New test.
	* gcc.dg/pr92951-2.c: New test.


diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c
--- a/gcc/modulo-sched.c
+++ b/gcc/modulo-sched.c
@@ -3209,7 +3209,7 @@ ps_add_node_check_conflicts (partial_schedule_ptr 
ps, int n,
     			     int c, sbitmap must_precede,
  			     sbitmap must_follow)
  {
-  int has_conflicts = 0;
+  int i, first, amount, has_conflicts = 0;
    ps_insn_ptr ps_i;

    /* First add the node to the PS, if this succeeds check for
@@ -3217,23 +3217,32 @@ ps_add_node_check_conflicts 
(partial_schedule_ptr ps, int n,
    if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
      return NULL; /* Failed to insert the node at the given cycle.  */

-  has_conflicts = ps_has_conflicts (ps, c, c)
-		  || (ps->history > 0
-		      && ps_has_conflicts (ps,
-					   c - ps->history,
-					   c + ps->history));
-
-  /* Try different issue slots to find one that the given node can be
-     scheduled in without conflicts.  */
-  while (has_conflicts)
+  while (1)
      {
+      has_conflicts = ps_has_conflicts (ps, c, c);
+      if (ps->history > 0 && !has_conflicts)
+	{
+	  /* Check all 2h+1 intervals, starting from c-2h..c up to c..2h,
+	     but not more than ii intervals.  */
+	  first = c - ps->history;
+	  amount = 2 * ps->history + 1;
+	  if (amount > ps->ii)
+	    amount = ps->ii;
+	  for (i = first; i < first + amount; i++)
+	    {
+	      has_conflicts = ps_has_conflicts (ps,
+						i - ps->history,
+						i + ps->history);
+	      if (has_conflicts)
+		break;
+	    }
+	}
+      if (!has_conflicts)
+	break;
+      /* Try different issue slots to find one that the given node can 
be
+	 scheduled in without conflicts.  */
        if (! ps_insn_advance_column (ps, ps_i, must_follow))
  	break;
-      has_conflicts = ps_has_conflicts (ps, c, c)
-		      || (ps->history > 0
-			  && ps_has_conflicts (ps,
-					       c - ps->history,
-					       c + ps->history));
      }

    if (has_conflicts)
diff --git a/gcc/testsuite/gcc.dg/pr92951-1.c 
b/gcc/testsuite/gcc.dg/pr92951-1.c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr92951-1.c
@@ -0,0 +1,11 @@
+/* PR rtl-optimization/92591 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fmodulo-sched -fweb -fno-dce -fno-ivopts 
-fno-sched-pressure -fno-tree-loop-distribute-patterns --param 
sms-dfa-history=1" } */
+/* { dg-additional-options "-mcpu=e500mc" { target { powerpc-*-* } } } 
*/
+
+void
+wf (char *mr, int tc)
+{
+  while (tc-- > 0)
+    *mr++ = 0;
+}
diff --git a/gcc/testsuite/gcc.dg/pr92951-2.c 
b/gcc/testsuite/gcc.dg/pr92951-2.c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr92951-2.c
@@ -0,0 +1,5 @@
+/* PR rtl-optimization/92591 */
+/* { dg-do compile } */
+/* { dg-options "-Os -fmodulo-sched -fmodulo-sched-allow-regmoves 
--param sms-dfa-history=8" } */
+
+#include "../gcc.c-torture/execute/pr61682.c"
diff --git a/gcc/params.def b/gcc/params.def
index e3ad05f..74215f2 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -387,7 +387,7 @@ DEFPARAM(PARAM_SMS_MIN_SC,
  DEFPARAM(PARAM_SMS_DFA_HISTORY,
  	 "sms-dfa-history",
  	 "The number of cycles the swing modulo scheduler considers when 
checking conflicts using DFA.",
-	 0, 0, 0)
+	 0, 0, 16)
  DEFPARAM(PARAM_SMS_LOOP_AVERAGE_COUNT_THRESHOLD,
  	 "sms-loop-average-count-threshold",
  	 "A threshold on the average loop count considered by the swing modulo 
scheduler.",
Richard Biener Dec. 18, 2019, 11:09 a.m. UTC | #2
On December 17, 2019 8:40:59 PM GMT+01:00, Roman Zhuykov <zhroma@ispras.ru> wrote:
>Hello.
>
>> As pointed out in the PR
>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92591#c1, the test can
>be
>> fixed by DFA-checking more adjacent row sequences in the partial
>> schedule.
>> I've found that on powerpc64 gcc.c-torture/execute/pr61682.c test
>> catches same issue with -Os -fmodulo-sched-allow-regmoves with some
>> non-zero sms-dfa-history parameter values, so I added that test using
>> #include as second test into the patch.
>> 
>> Minor separate patch about modulo-sched parameters is also attached.
>> If no objection, I'll commit this two patches into trunk tomorrow
>> together with my PR90001 fix.
>> 
>> Trunk and 8/9 branches succesfully regstrapped on x64, and
>> cross-compiler check-gcc tested on ppc, ppc64, arm, aarch64, ia64 and
>> s390. Certainly a lot of testing were also done with changing default
>> sms-dfa-history value to some other than zero.
>
>I think this should be backported into 9 and 8 branches, because second
>
>example gives an ICE there.  But I'm not sure about backporting 
>sms-dfa-history upper bound limitation (<=16) into params.def in 
>branches.  Compile-time may grow dramatically for huge values like
>1000, 
>so we have to limit it.  Is it ok to limit the parameter, or maybe it's
>
>better to implement some "history=MIN(history, 16)" logic in 
>modulo-sched.c ?
>
>I see that sometimes parameter limitation is backported, examples are:
>https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80663
>https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79576
>
>While at it, maybe you have some thoughts about selected value of 16.  
>Maximum reasonable value for sms-dfa-history param seems to be max 
>latency between two insns on target platform (calculated by dep_cost 
>function in haifa-sched.c).
>
>I'm posting full backport patch here, it suits 8/9 branches. Jakub and 
>Richard, is it OK ?

Ok with me. 

Richard. 

>Roman
>
>Backport from mainline
>gcc/ChangeLog:
>
>2019-12-17  Roman Zhuykov  <zhroma@ispras.ru>
>
>	* modulo-sched.c (ps_add_node_check_conflicts): Improve checking
>	for history > 0 case.
>	* params.def (sms-dfa-history): Limit to 16.
>
>gcc/testsuite/ChangeLog:
>
>2019-12-17  Roman Zhuykov  <zhroma@ispras.ru>
>
>	* gcc.dg/pr92951-1.c: New test.
>	* gcc.dg/pr92951-2.c: New test.
>
>
>diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c
>--- a/gcc/modulo-sched.c
>+++ b/gcc/modulo-sched.c
>@@ -3209,7 +3209,7 @@ ps_add_node_check_conflicts (partial_schedule_ptr
>
>ps, int n,
>     			     int c, sbitmap must_precede,
>  			     sbitmap must_follow)
>  {
>-  int has_conflicts = 0;
>+  int i, first, amount, has_conflicts = 0;
>    ps_insn_ptr ps_i;
>
>    /* First add the node to the PS, if this succeeds check for
>@@ -3217,23 +3217,32 @@ ps_add_node_check_conflicts 
>(partial_schedule_ptr ps, int n,
>   if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
>      return NULL; /* Failed to insert the node at the given cycle.  */
>
>-  has_conflicts = ps_has_conflicts (ps, c, c)
>-		  || (ps->history > 0
>-		      && ps_has_conflicts (ps,
>-					   c - ps->history,
>-					   c + ps->history));
>-
>-  /* Try different issue slots to find one that the given node can be
>-     scheduled in without conflicts.  */
>-  while (has_conflicts)
>+  while (1)
>      {
>+      has_conflicts = ps_has_conflicts (ps, c, c);
>+      if (ps->history > 0 && !has_conflicts)
>+	{
>+	  /* Check all 2h+1 intervals, starting from c-2h..c up to c..2h,
>+	     but not more than ii intervals.  */
>+	  first = c - ps->history;
>+	  amount = 2 * ps->history + 1;
>+	  if (amount > ps->ii)
>+	    amount = ps->ii;
>+	  for (i = first; i < first + amount; i++)
>+	    {
>+	      has_conflicts = ps_has_conflicts (ps,
>+						i - ps->history,
>+						i + ps->history);
>+	      if (has_conflicts)
>+		break;
>+	    }
>+	}
>+      if (!has_conflicts)
>+	break;
>+      /* Try different issue slots to find one that the given node can
>
>be
>+	 scheduled in without conflicts.  */
>        if (! ps_insn_advance_column (ps, ps_i, must_follow))
>  	break;
>-      has_conflicts = ps_has_conflicts (ps, c, c)
>-		      || (ps->history > 0
>-			  && ps_has_conflicts (ps,
>-					       c - ps->history,
>-					       c + ps->history));
>      }
>
>    if (has_conflicts)
>diff --git a/gcc/testsuite/gcc.dg/pr92951-1.c 
>b/gcc/testsuite/gcc.dg/pr92951-1.c
>--- /dev/null
>+++ b/gcc/testsuite/gcc.dg/pr92951-1.c
>@@ -0,0 +1,11 @@
>+/* PR rtl-optimization/92591 */
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -fmodulo-sched -fweb -fno-dce -fno-ivopts 
>-fno-sched-pressure -fno-tree-loop-distribute-patterns --param 
>sms-dfa-history=1" } */
>+/* { dg-additional-options "-mcpu=e500mc" { target { powerpc-*-* } } }
>
>*/
>+
>+void
>+wf (char *mr, int tc)
>+{
>+  while (tc-- > 0)
>+    *mr++ = 0;
>+}
>diff --git a/gcc/testsuite/gcc.dg/pr92951-2.c 
>b/gcc/testsuite/gcc.dg/pr92951-2.c
>--- /dev/null
>+++ b/gcc/testsuite/gcc.dg/pr92951-2.c
>@@ -0,0 +1,5 @@
>+/* PR rtl-optimization/92591 */
>+/* { dg-do compile } */
>+/* { dg-options "-Os -fmodulo-sched -fmodulo-sched-allow-regmoves 
>--param sms-dfa-history=8" } */
>+
>+#include "../gcc.c-torture/execute/pr61682.c"
>diff --git a/gcc/params.def b/gcc/params.def
>index e3ad05f..74215f2 100644
>--- a/gcc/params.def
>+++ b/gcc/params.def
>@@ -387,7 +387,7 @@ DEFPARAM(PARAM_SMS_MIN_SC,
>  DEFPARAM(PARAM_SMS_DFA_HISTORY,
>  	 "sms-dfa-history",
>  	 "The number of cycles the swing modulo scheduler considers when 
>checking conflicts using DFA.",
>-	 0, 0, 0)
>+	 0, 0, 16)
>  DEFPARAM(PARAM_SMS_LOOP_AVERAGE_COUNT_THRESHOLD,
>  	 "sms-loop-average-count-threshold",
>	 "A threshold on the average loop count considered by the swing modulo
>
>scheduler.",
diff mbox series

Patch

diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c
--- a/gcc/modulo-sched.c
+++ b/gcc/modulo-sched.c
@@ -3200,7 +3200,7 @@  ps_add_node_check_conflicts (partial_schedule_ptr 
ps, int n,
     			     int c, sbitmap must_precede,
  			     sbitmap must_follow)
  {
-  int has_conflicts = 0;
+  int i, first, amount, has_conflicts = 0;
    ps_insn_ptr ps_i;

    /* First add the node to the PS, if this succeeds check for
@@ -3208,23 +3208,32 @@  ps_add_node_check_conflicts 
(partial_schedule_ptr ps, int n,
    if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
      return NULL; /* Failed to insert the node at the given cycle.  */

-  has_conflicts = ps_has_conflicts (ps, c, c)
-		  || (ps->history > 0
-		      && ps_has_conflicts (ps,
-					   c - ps->history,
-					   c + ps->history));
-
-  /* Try different issue slots to find one that the given node can be
-     scheduled in without conflicts.  */
-  while (has_conflicts)
+  while (1)
      {
+      has_conflicts = ps_has_conflicts (ps, c, c);
+      if (ps->history > 0 && !has_conflicts)
+	{
+	  /* Check all 2h+1 intervals, starting from c-2h..c up to c..2h,
+	     but not more than ii intervals.  */
+	  first = c - ps->history;
+	  amount = 2 * ps->history + 1;
+	  if (amount > ps->ii)
+	    amount = ps->ii;
+	  for (i = first; i < first + amount; i++)
+	    {
+	      has_conflicts = ps_has_conflicts (ps,
+						i - ps->history,
+						i + ps->history);
+	      if (has_conflicts)
+		break;
+	    }
+	}
+      if (!has_conflicts)
+	break;
+      /* Try different issue slots to find one that the given node can 
be
+	 scheduled in without conflicts.  */
        if (! ps_insn_advance_column (ps, ps_i, must_follow))
  	break;
-      has_conflicts = ps_has_conflicts (ps, c, c)
-		      || (ps->history > 0
-			  && ps_has_conflicts (ps,
-					       c - ps->history,
-					       c + ps->history));
      }

    if (has_conflicts)
diff --git a/gcc/testsuite/gcc.dg/pr92951-1.c 
b/gcc/testsuite/gcc.dg/pr92951-1.c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr92951-1.c
@@ -0,0 +1,11 @@ 
+/* PR rtl-optimization/92591 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fmodulo-sched -fweb -fno-dce -fno-ivopts 
-fno-sched-pressure -fno-tree-loop-distribute-patterns --param 
sms-dfa-history=1" } */
+/* { dg-additional-options "-mcpu=e500mc" { target { powerpc-*-* } } } 
*/
+
+void
+wf (char *mr, int tc)
+{
+  while (tc-- > 0)
+    *mr++ = 0;
+}
diff --git a/gcc/testsuite/gcc.dg/pr92951-2.c 
b/gcc/testsuite/gcc.dg/pr92951-2.c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr92951-2.c
@@ -0,0 +1,5 @@ 
+/* PR rtl-optimization/92591 */
+/* { dg-do compile } */
+/* { dg-options "-Os -fmodulo-sched -fmodulo-sched-allow-regmoves 
--param sms-dfa-history=8" } */
+
+#include "../gcc.c-torture/execute/pr61682.c"