diff mbox series

rs6000: Refactor scheduling hook code

Message ID 36220a14-50fd-a035-9da6-f554426aac0c@linux.ibm.com
State New
Headers show
Series rs6000: Refactor scheduling hook code | expand

Commit Message

Pat Haugen Nov. 18, 2019, 9:24 p.m. UTC
The following patch factors out some common code to its own function and also moves the Power6 specific code to a new function.

Bootstrap/regtest on powerpc64le with no regressions. Ok for trunk?

-Pat


2019-11-18  Pat Haugen  <pthaugen@us.ibm.com>

	* config/rs6000/rs6000.c (move_to_end_of_ready): New, factored out
	from common code.
	(power6_sched_reorder2): Factored out from rs6000_sched_reorder2,
	call new function.
	(power9_sched_reorder2): Call new function.
	(rs6000_sched_reorder2): Likewise.

Comments

Segher Boessenkool Nov. 19, 2019, 12:36 a.m. UTC | #1
Hi!

On Mon, Nov 18, 2019 at 03:24:56PM -0600, Pat Haugen wrote:
> The following patch factors out some common code to its own function and also moves the Power6 specific code to a new function.

If you had done the latter as a separate (earlier) patch, it would be
easier to review: now it is pretty hard to check if the changes to the
power6 code are correct.  I'll just trust you got it right, the power9
part looks fine, so okay for trunk.  Thanks!


Segher


> 2019-11-18  Pat Haugen  <pthaugen@us.ibm.com>
> 
> 	* config/rs6000/rs6000.c (move_to_end_of_ready): New, factored out
> 	from common code.
> 	(power6_sched_reorder2): Factored out from rs6000_sched_reorder2,
> 	call new function.
> 	(power9_sched_reorder2): Call new function.
> 	(rs6000_sched_reorder2): Likewise.
diff mbox series

Patch

Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 278306)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -17711,14 +17711,216 @@  get_next_active_insn (rtx_insn *insn, rt
   return insn;
 }
 
+/* Move instruction at POS to the end of the READY list.  */
+
+static void
+move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
+{
+  rtx_insn *tmp;
+  int i;
+
+  tmp = ready[pos];
+  for (i = pos; i < lastpos; i++)
+    ready[i] = ready[i + 1];
+  ready[lastpos] = tmp;
+}
+
+/* Do Power6 specific sched_reorder2 reordering of ready list.  */
+
+static int
+power6_sched_reorder2 (rtx_insn **ready, int lastpos)
+{
+  /* For Power6, we need to handle some special cases to try and keep the
+     store queue from overflowing and triggering expensive flushes.
+
+     This code monitors how load and store instructions are being issued
+     and skews the ready list one way or the other to increase the likelihood
+     that a desired instruction is issued at the proper time.
+
+     A couple of things are done.  First, we maintain a "load_store_pendulum"
+     to track the current state of load/store issue.
+
+       - If the pendulum is at zero, then no loads or stores have been
+	 issued in the current cycle so we do nothing.
+
+       - If the pendulum is 1, then a single load has been issued in this
+	 cycle and we attempt to locate another load in the ready list to
+	 issue with it.
+
+       - If the pendulum is -2, then two stores have already been
+	 issued in this cycle, so we increase the priority of the first load
+	 in the ready list to increase it's likelihood of being chosen first
+	 in the next cycle.
+
+       - If the pendulum is -1, then a single store has been issued in this
+	 cycle and we attempt to locate another store in the ready list to
+	 issue with it, preferring a store to an adjacent memory location to
+	 facilitate store pairing in the store queue.
+
+       - If the pendulum is 2, then two loads have already been
+	 issued in this cycle, so we increase the priority of the first store
+	 in the ready list to increase it's likelihood of being chosen first
+	 in the next cycle.
+
+       - If the pendulum < -2 or > 2, then do nothing.
+
+       Note: This code covers the most common scenarios.  There exist non
+	     load/store instructions which make use of the LSU and which
+	     would need to be accounted for to strictly model the behavior
+	     of the machine.  Those instructions are currently unaccounted
+	     for to help minimize compile time overhead of this code.
+   */
+  int pos;
+  rtx load_mem, str_mem;
+
+  if (is_store_insn (last_scheduled_insn, &str_mem))
+    /* Issuing a store, swing the load_store_pendulum to the left */
+    load_store_pendulum--;
+  else if (is_load_insn (last_scheduled_insn, &load_mem))
+    /* Issuing a load, swing the load_store_pendulum to the right */
+    load_store_pendulum++;
+  else
+    return cached_can_issue_more;
+
+  /* If the pendulum is balanced, or there is only one instruction on
+     the ready list, then all is well, so return. */
+  if ((load_store_pendulum == 0) || (lastpos <= 0))
+    return cached_can_issue_more;
+
+  if (load_store_pendulum == 1)
+    {
+      /* A load has been issued in this cycle.  Scan the ready list
+	 for another load to issue with it */
+      pos = lastpos;
+
+      while (pos >= 0)
+	{
+	  if (is_load_insn (ready[pos], &load_mem))
+	    {
+	      /* Found a load.  Move it to the head of the ready list,
+		 and adjust it's priority so that it is more likely to
+		 stay there */
+	      move_to_end_of_ready (ready, pos, lastpos);
+
+	      if (!sel_sched_p ()
+		  && INSN_PRIORITY_KNOWN (ready[lastpos]))
+		INSN_PRIORITY (ready[lastpos])++;
+	      break;
+	    }
+	  pos--;
+	}
+    }
+  else if (load_store_pendulum == -2)
+    {
+      /* Two stores have been issued in this cycle.  Increase the
+	 priority of the first load in the ready list to favor it for
+	 issuing in the next cycle. */
+      pos = lastpos;
+
+      while (pos >= 0)
+	{
+	  if (is_load_insn (ready[pos], &load_mem)
+	      && !sel_sched_p ()
+	      && INSN_PRIORITY_KNOWN (ready[pos]))
+	    {
+	      INSN_PRIORITY (ready[pos])++;
+
+	      /* Adjust the pendulum to account for the fact that a load
+		 was found and increased in priority.  This is to prevent
+		 increasing the priority of multiple loads */
+	      load_store_pendulum--;
+
+	      break;
+	    }
+	  pos--;
+	}
+    }
+  else if (load_store_pendulum == -1)
+    {
+      /* A store has been issued in this cycle.  Scan the ready list for
+	 another store to issue with it, preferring a store to an adjacent
+	 memory location */
+      int first_store_pos = -1;
+
+      pos = lastpos;
+
+      while (pos >= 0)
+	{
+	  if (is_store_insn (ready[pos], &str_mem))
+	    {
+	      rtx str_mem2;
+	      /* Maintain the index of the first store found on the
+		 list */
+	      if (first_store_pos == -1)
+		first_store_pos = pos;
+
+	      if (is_store_insn (last_scheduled_insn, &str_mem2)
+		  && adjacent_mem_locations (str_mem, str_mem2))
+		{
+		  /* Found an adjacent store.  Move it to the head of the
+		     ready list, and adjust it's priority so that it is
+		     more likely to stay there */
+		  move_to_end_of_ready (ready, pos, lastpos);
+
+		  if (!sel_sched_p ()
+		      && INSN_PRIORITY_KNOWN (ready[lastpos]))
+		    INSN_PRIORITY (ready[lastpos])++;
+
+		  first_store_pos = -1;
+
+		  break;
+		};
+	    }
+	  pos--;
+	}
+
+      if (first_store_pos >= 0)
+	{
+	  /* An adjacent store wasn't found, but a non-adjacent store was,
+	     so move the non-adjacent store to the front of the ready
+	     list, and adjust its priority so that it is more likely to
+	     stay there. */
+	  move_to_end_of_ready (ready, first_store_pos, lastpos);
+	  if (!sel_sched_p ()
+	      && INSN_PRIORITY_KNOWN (ready[lastpos]))
+	    INSN_PRIORITY (ready[lastpos])++;
+	}
+    }
+  else if (load_store_pendulum == 2)
+    {
+      /* Two loads have been issued in this cycle.  Increase the priority
+	 of the first store in the ready list to favor it for issuing in
+	 the next cycle. */
+      pos = lastpos;
+
+      while (pos >= 0)
+	{
+	  if (is_store_insn (ready[pos], &str_mem)
+	      && !sel_sched_p ()
+	      && INSN_PRIORITY_KNOWN (ready[pos]))
+	    {
+	      INSN_PRIORITY (ready[pos])++;
+
+	      /* Adjust the pendulum to account for the fact that a store
+		 was found and increased in priority.  This is to prevent
+		 increasing the priority of multiple stores */
+	      load_store_pendulum++;
+
+	      break;
+	    }
+	  pos--;
+	}
+    }
+
+  return cached_can_issue_more;
+}
+
 /* Do Power9 specific sched_reorder2 reordering of ready list.  */
 
 static int
 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
 {
   int pos;
-  int i;
-  rtx_insn *tmp;
   enum attr_type type, type2;
 
   type = get_attr_type (last_scheduled_insn);
@@ -17738,10 +17940,7 @@  power9_sched_reorder2 (rtx_insn **ready,
 	  if (recog_memoized (ready[pos]) >= 0
 	      && get_attr_type (ready[pos]) == TYPE_DIV)
 	    {
-	      tmp = ready[pos];
-	      for (i = pos; i < lastpos; i++)
-		ready[i] = ready[i + 1];
-	      ready[lastpos] = tmp;
+	      move_to_end_of_ready (ready, pos, lastpos);
 	      break;
 	    }
 	  pos--;
@@ -17784,10 +17983,7 @@  power9_sched_reorder2 (rtx_insn **ready,
 			{
 			  /* Found a vector insn to pair with, move it to the
 			     end of the ready list so it is scheduled next.  */
-			  tmp = ready[pos];
-			  for (i = pos; i < lastpos; i++)
-			    ready[i] = ready[i + 1];
-			  ready[lastpos] = tmp;
+			  move_to_end_of_ready (ready, pos, lastpos);
 			  vec_pairing = 1;
 			  return cached_can_issue_more;
 			}
@@ -17801,10 +17997,7 @@  power9_sched_reorder2 (rtx_insn **ready,
 		{
 		  /* Didn't find a vector to pair with but did find a vecload,
 		     move it to the end of the ready list.  */
-		  tmp = ready[vecload_pos];
-		  for (i = vecload_pos; i < lastpos; i++)
-		    ready[i] = ready[i + 1];
-		  ready[lastpos] = tmp;
+		  move_to_end_of_ready (ready, vecload_pos, lastpos);
 		  vec_pairing = 1;
 		  return cached_can_issue_more;
 		}
@@ -17828,10 +18021,7 @@  power9_sched_reorder2 (rtx_insn **ready,
 			{
 			  /* Found a vecload insn to pair with, move it to the
 			     end of the ready list so it is scheduled next.  */
-			  tmp = ready[pos];
-			  for (i = pos; i < lastpos; i++)
-			    ready[i] = ready[i + 1];
-			  ready[lastpos] = tmp;
+			  move_to_end_of_ready (ready, pos, lastpos);
 			  vec_pairing = 1;
 			  return cached_can_issue_more;
 			}
@@ -17846,10 +18036,7 @@  power9_sched_reorder2 (rtx_insn **ready,
 		{
 		  /* Didn't find a vecload to pair with but did find a vector
 		     insn, move it to the end of the ready list.  */
-		  tmp = ready[vec_pos];
-		  for (i = vec_pos; i < lastpos; i++)
-		    ready[i] = ready[i + 1];
-		  ready[lastpos] = tmp;
+		  move_to_end_of_ready (ready, vec_pos, lastpos);
 		  vec_pairing = 1;
 		  return cached_can_issue_more;
 		}
@@ -17903,198 +18090,9 @@  rs6000_sched_reorder2 (FILE *dump, int s
   if (sched_verbose)
     fprintf (dump, "// rs6000_sched_reorder2 :\n");
 
-  /* For Power6, we need to handle some special cases to try and keep the
-     store queue from overflowing and triggering expensive flushes.
-
-     This code monitors how load and store instructions are being issued
-     and skews the ready list one way or the other to increase the likelihood
-     that a desired instruction is issued at the proper time.
-
-     A couple of things are done.  First, we maintain a "load_store_pendulum"
-     to track the current state of load/store issue.
-
-       - If the pendulum is at zero, then no loads or stores have been
-         issued in the current cycle so we do nothing.
-
-       - If the pendulum is 1, then a single load has been issued in this
-         cycle and we attempt to locate another load in the ready list to
-         issue with it.
-
-       - If the pendulum is -2, then two stores have already been
-         issued in this cycle, so we increase the priority of the first load
-         in the ready list to increase it's likelihood of being chosen first
-         in the next cycle.
-
-       - If the pendulum is -1, then a single store has been issued in this
-         cycle and we attempt to locate another store in the ready list to
-         issue with it, preferring a store to an adjacent memory location to
-         facilitate store pairing in the store queue.
-
-       - If the pendulum is 2, then two loads have already been
-         issued in this cycle, so we increase the priority of the first store
-         in the ready list to increase it's likelihood of being chosen first
-         in the next cycle.
-
-       - If the pendulum < -2 or > 2, then do nothing.
-
-       Note: This code covers the most common scenarios.  There exist non
-             load/store instructions which make use of the LSU and which
-             would need to be accounted for to strictly model the behavior
-             of the machine.  Those instructions are currently unaccounted
-             for to help minimize compile time overhead of this code.
-   */
+  /* Do Power6 dependent reordering if necessary.  */
   if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
-    {
-      int pos;
-      int i;
-      rtx_insn *tmp;
-      rtx load_mem, str_mem;
-
-      if (is_store_insn (last_scheduled_insn, &str_mem))
-        /* Issuing a store, swing the load_store_pendulum to the left */
-        load_store_pendulum--;
-      else if (is_load_insn (last_scheduled_insn, &load_mem))
-        /* Issuing a load, swing the load_store_pendulum to the right */
-        load_store_pendulum++;
-      else
-        return cached_can_issue_more;
-
-      /* If the pendulum is balanced, or there is only one instruction on
-         the ready list, then all is well, so return. */
-      if ((load_store_pendulum == 0) || (*pn_ready <= 1))
-        return cached_can_issue_more;
-
-      if (load_store_pendulum == 1)
-        {
-          /* A load has been issued in this cycle.  Scan the ready list
-             for another load to issue with it */
-          pos = *pn_ready-1;
-
-          while (pos >= 0)
-            {
-              if (is_load_insn (ready[pos], &load_mem))
-                {
-                  /* Found a load.  Move it to the head of the ready list,
-                     and adjust it's priority so that it is more likely to
-                     stay there */
-                  tmp = ready[pos];
-                  for (i=pos; i<*pn_ready-1; i++)
-                    ready[i] = ready[i + 1];
-                  ready[*pn_ready-1] = tmp;
-
-                  if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
-                    INSN_PRIORITY (tmp)++;
-                  break;
-                }
-              pos--;
-            }
-        }
-      else if (load_store_pendulum == -2)
-        {
-          /* Two stores have been issued in this cycle.  Increase the
-             priority of the first load in the ready list to favor it for
-             issuing in the next cycle. */
-          pos = *pn_ready-1;
-
-          while (pos >= 0)
-            {
-              if (is_load_insn (ready[pos], &load_mem)
-                  && !sel_sched_p ()
-		  && INSN_PRIORITY_KNOWN (ready[pos]))
-                {
-                  INSN_PRIORITY (ready[pos])++;
-
-                  /* Adjust the pendulum to account for the fact that a load
-                     was found and increased in priority.  This is to prevent
-                     increasing the priority of multiple loads */
-                  load_store_pendulum--;
-
-                  break;
-                }
-              pos--;
-            }
-        }
-      else if (load_store_pendulum == -1)
-        {
-          /* A store has been issued in this cycle.  Scan the ready list for
-             another store to issue with it, preferring a store to an adjacent
-             memory location */
-          int first_store_pos = -1;
-
-          pos = *pn_ready-1;
-
-          while (pos >= 0)
-            {
-              if (is_store_insn (ready[pos], &str_mem))
-                {
-		  rtx str_mem2;
-                  /* Maintain the index of the first store found on the
-                     list */
-                  if (first_store_pos == -1)
-                    first_store_pos = pos;
-
-                  if (is_store_insn (last_scheduled_insn, &str_mem2)
-                      && adjacent_mem_locations (str_mem, str_mem2))
-                    {
-                      /* Found an adjacent store.  Move it to the head of the
-                         ready list, and adjust it's priority so that it is
-                         more likely to stay there */
-                      tmp = ready[pos];
-                      for (i=pos; i<*pn_ready-1; i++)
-                        ready[i] = ready[i + 1];
-                      ready[*pn_ready-1] = tmp;
-
-                      if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
-                        INSN_PRIORITY (tmp)++;
-
-                      first_store_pos = -1;
-
-                      break;
-                    };
-                }
-              pos--;
-            }
-
-          if (first_store_pos >= 0)
-            {
-              /* An adjacent store wasn't found, but a non-adjacent store was,
-                 so move the non-adjacent store to the front of the ready
-                 list, and adjust its priority so that it is more likely to
-                 stay there. */
-              tmp = ready[first_store_pos];
-              for (i=first_store_pos; i<*pn_ready-1; i++)
-                ready[i] = ready[i + 1];
-              ready[*pn_ready-1] = tmp;
-              if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
-                INSN_PRIORITY (tmp)++;
-            }
-        }
-      else if (load_store_pendulum == 2)
-       {
-           /* Two loads have been issued in this cycle.  Increase the priority
-              of the first store in the ready list to favor it for issuing in
-              the next cycle. */
-          pos = *pn_ready-1;
-
-          while (pos >= 0)
-            {
-              if (is_store_insn (ready[pos], &str_mem)
-                  && !sel_sched_p ()
-		  && INSN_PRIORITY_KNOWN (ready[pos]))
-                {
-                  INSN_PRIORITY (ready[pos])++;
-
-                  /* Adjust the pendulum to account for the fact that a store
-                     was found and increased in priority.  This is to prevent
-                     increasing the priority of multiple stores */
-                  load_store_pendulum++;
-
-                  break;
-                }
-              pos--;
-            }
-        }
-    }
+    return power6_sched_reorder2 (ready, *pn_ready - 1);
 
   /* Do Power9 dependent reordering if necessary.  */
   if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn