[1/2] S/390: Handle long-running instructions

Message ID 20171011115338.4072-1-rdapp@linux.vnet.ibm.com
State New
Headers show
Series
  • [1/2] S/390: Handle long-running instructions
Related show

Commit Message

Robin Dapp Oct. 11, 2017, 11:53 a.m.
This patch introduces balancing of long-running instructions that may clog the
pipeline.


gcc/ChangeLog:

2017-10-11  Robin Dapp  <rdapp@linux.vnet.ibm.com>

        * config/s390/s390.c (NUM_SIDES): New constant.
        (LONGRUNNING_THRESHOLD): New constant.
        (LATENCY_FACTOR): New constant.
        (s390_sched_score): Lower score for long-running instructions on same
        side.
        (s390_sched_variable_issue): Bookkeeping for long-running instructions.


---
 gcc/config/s390/s390.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 63 insertions(+), 1 deletion(-)

Comments

Andreas Krebbel Oct. 16, 2017, 9:15 a.m. | #1
On 10/11/2017 01:53 PM, Robin Dapp wrote:
...
> @@ -14623,8 +14659,13 @@ s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
>  	    case 1:
>  	    case 2:
>  	    case S390_SCHED_STATE_NORMAL:
> +	      if (s390_sched_state == 0)
> +		starts_group = true;
>  	      if (s390_sched_state == S390_SCHED_STATE_NORMAL)
> -		s390_sched_state = 1;
> +		{
> +		  starts_group = true;
> +		  s390_sched_state = 1;
> +		}

Should be the same as:

	    case 0:
	      starts_group = true;
	      /* fallthru */
	    case 1:
	    case 2:
	      s390_sched_state++;
	      break;
	    case S390_SCHED_STATE_NORMAL:
	      starts_group = true;
	      s390_sched_state = 1;
	      break;
	    case S390_SCHED_STATE_CRACKED:
	      s390_sched_state = S390_SCHED_STATE_NORMAL;
	      break;
	    }

Ok with that change. Thanks!

-Andreas-

Patch

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 36bc67d..2430933 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -355,6 +355,18 @@  static rtx_insn *last_scheduled_insn;
 #define MAX_SCHED_UNITS 3
 static int last_scheduled_unit_distance[MAX_SCHED_UNITS];
 
+#define NUM_SIDES 2
+static int current_side = 1;
+#define LONGRUNNING_THRESHOLD 5
+
+/* Estimate of number of cycles a long-running insn occupies an
+   execution unit.  */
+static unsigned fxu_longrunning[NUM_SIDES];
+static unsigned vfu_longrunning[NUM_SIDES];
+
+/* Factor to scale latencies by, determined by measurements.  */
+#define LATENCY_FACTOR 4
+
 /* The maximum score added for an instruction whose unit hasn't been
    in use for MAX_SCHED_MIX_DISTANCE steps.  Increase this value to
    give instruction mix scheduling more priority over instruction
@@ -14483,7 +14495,24 @@  s390_sched_score (rtx_insn *insn)
 	if (m & unit_mask)
 	  score += (last_scheduled_unit_distance[i] * MAX_SCHED_MIX_SCORE /
 		    MAX_SCHED_MIX_DISTANCE);
+
+      unsigned latency = insn_default_latency (insn);
+
+      int other_side = 1 - current_side;
+
+      /* Try to delay long-running insns when side is busy.  */
+      if (latency > LONGRUNNING_THRESHOLD)
+	{
+	  if (get_attr_z13_unit_fxu (insn) && fxu_longrunning[current_side]
+	      && fxu_longrunning[other_side] <= fxu_longrunning[current_side])
+	    score = MAX (0, score - 10);
+
+	  if (get_attr_z13_unit_vfu (insn) && vfu_longrunning[current_side]
+	      && vfu_longrunning[other_side] <= vfu_longrunning[current_side])
+	    score = MAX (0, score - 10);
+	}
     }
+
   return score;
 }
 
@@ -14602,6 +14631,8 @@  s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
 {
   last_scheduled_insn = insn;
 
+  bool starts_group = false;
+
   if (s390_tune >= PROCESSOR_2827_ZEC12
       && reload_completed
       && recog_memoized (insn) >= 0)
@@ -14609,6 +14640,11 @@  s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
       unsigned int mask = s390_get_sched_attrmask (insn);
 
       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
+	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0
+	  || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
+	starts_group = true;
+
+      if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
 	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
 	s390_sched_state = S390_SCHED_STATE_CRACKED;
       else if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0
@@ -14623,8 +14659,13 @@  s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
 	    case 1:
 	    case 2:
 	    case S390_SCHED_STATE_NORMAL:
+	      if (s390_sched_state == 0)
+		starts_group = true;
 	      if (s390_sched_state == S390_SCHED_STATE_NORMAL)
-		s390_sched_state = 1;
+		{
+		  starts_group = true;
+		  s390_sched_state = 1;
+		}
 	      else
 		s390_sched_state++;
 
@@ -14650,6 +14691,27 @@  s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
 	      last_scheduled_unit_distance[i]++;
 	}
 
+      /* If this insn started a new group, the side flipped.  */
+      if (starts_group)
+	current_side = current_side ? 0 : 1;
+
+      for (int i = 0; i < 2; i++)
+	{
+	  if (fxu_longrunning[i] >= 1)
+	    fxu_longrunning[i] -= 1;
+	  if (vfu_longrunning[i] >= 1)
+	    vfu_longrunning[i] -= 1;
+	}
+
+      unsigned latency = insn_default_latency (insn);
+      if (latency > LONGRUNNING_THRESHOLD)
+	{
+	  if (get_attr_z13_unit_fxu (insn))
+	    fxu_longrunning[current_side] = latency * LATENCY_FACTOR;
+	  else
+	    vfu_longrunning[current_side] = latency * LATENCY_FACTOR;
+	}
+
       if (verbose > 5)
 	{
 	  unsigned int sched_mask;