[2/2] S/390: Do not end groups after fallthru edge

Message ID 20171011115338.4072-2-rdapp@linux.vnet.ibm.com
State New
Headers show
Series
  • [1/2] S/390: Handle long-running instructions
Related show

Commit Message

Robin Dapp Oct. 11, 2017, 11:53 a.m.
This patch fixes cases where we start a new group although the previous one has
not ended.

Regression tested on s390x.

gcc/ChangeLog:

2017-10-11  Robin Dapp  <rdapp@linux.vnet.ibm.com>

        * config/s390/s390.c (s390_has_ok_fallthru): New function.
        (s390_sched_score): Temporarily change s390_sched_state.
        (s390_sched_variable_issue): Use s390_last_sched_state in case the
        group is not finished yet.
        (s390_sched_init): Init s390_last_sched_state.


---
 gcc/config/s390/s390.c | 44 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 41 insertions(+), 3 deletions(-)

Comments

Andreas Krebbel Oct. 16, 2017, 10:57 a.m. | #1
On 10/11/2017 01:53 PM, Robin Dapp wrote:
> This patch fixes cases where we start a new group although the previous one has
> not ended.
> 
> Regression tested on s390x.
> 
> gcc/ChangeLog:
> 
> 2017-10-11  Robin Dapp  <rdapp@linux.vnet.ibm.com>
> 
>         * config/s390/s390.c (s390_has_ok_fallthru): New function.
>         (s390_sched_score): Temporarily change s390_sched_state.
>         (s390_sched_variable_issue): Use s390_last_sched_state in case the
>         group is not finished yet.
>         (s390_sched_init): Init s390_last_sched_state.
> 
> 
> ---
>  gcc/config/s390/s390.c | 44 +++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 41 insertions(+), 3 deletions(-)
> 
> diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
> index 2411c67..7f08ba2 100644
> --- a/gcc/config/s390/s390.c
> +++ b/gcc/config/s390/s390.c
> @@ -14346,6 +14346,31 @@ s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
>    ready[0] = tmp;
>  }
> 
> +static bool
> +s390_has_ok_fallthru (rtx_insn *insn)

Please add a comment describing what the function does.
Wouldn't bb be a better parameter?
Perhaps we also could find a better name for it?! Something shorter for
s390_bb_likely_entered_via_fallthru

> +{
> +  edge e, fallthru_edge;
> +  edge_iterator ei;
> +  bool other_likely_edge = false;
> +
> +  fallthru_edge =
> +    find_fallthru_edge (BLOCK_FOR_INSN (insn)->preds);
> +  FOR_EACH_EDGE (e, ei, BLOCK_FOR_INSN (insn)->preds)
> +    {
> +      if (e != fallthru_edge
> +	  && e->probability >= profile_probability::unlikely ())
> +	{
> +	  other_likely_edge = true;
> +	  break;
> +	}
> +    }
> +
> +  if (fallthru_edge && !other_likely_edge)
> +    return true;
> +
> +  return false;
> +}

edge e, fallthru_edge;
edge_iterator ei;

fallthru_edge =
   find_fallthru_edge (BLOCK_FOR_INSN (insn)->preds);

if (!fallthru_edge)
  return false;

FOR_EACH_EDGE (e, ei, BLOCK_FOR_INSN (insn)->preds)
  if (e != fallthru_edge
      && e->probability >= profile_probability::unlikely ())
        return false;

return true;


>  /* The s390_sched_state variable tracks the state of the current or
>     the last instruction group.
> @@ -14355,6 +14380,7 @@ s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
>     4     the last group was a cracked/expanded insn */
> 
>  static int s390_sched_state;
> +static int s390_last_sched_state;
> 
>  #define S390_SCHED_STATE_NORMAL  3
>  #define S390_SCHED_STATE_CRACKED 4
> @@ -14430,7 +14456,14 @@ s390_sched_score (rtx_insn *insn)
>    unsigned int mask = s390_get_sched_attrmask (insn);
>    int score = 0;
> 
> -  switch (s390_sched_state)
> +  int temp_sched_state = s390_sched_state;
> +
> +  if (s390_sched_state < s390_last_sched_state
> +      && s390_last_sched_state < S390_SCHED_STATE_NORMAL
> +      && s390_has_ok_fallthru (insn))
> +    temp_sched_state = s390_last_sched_state;

Can't we just set s390_sched_state to s390_last_sched_state in s390_sched_init.

> +
> +  switch (temp_sched_state)
>      {
>      case 0:
>        /* Try to put insns into the first slot which would otherwise
> @@ -14656,11 +14689,15 @@ s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
>  	  switch (s390_sched_state)
>  	    {
>  	    case 0:
> +	      if (s390_last_sched_state > 0 && s390_has_ok_fallthru (insn))
> +		s390_sched_state = s390_last_sched_state;

Could be removed after setting s390_sched_state in s390_sched_init.

> +
> +	      if (s390_sched_state == 0)
> +		starts_group = true;
> +	      /* fallthrough */
>  	    case 1:
>  	    case 2:
>  	    case S390_SCHED_STATE_NORMAL:
> -	      if (s390_sched_state == 0)
> -		starts_group = true;
>  	      if (s390_sched_state == S390_SCHED_STATE_NORMAL)
>  		{
>  		  starts_group = true;
> @@ -14768,6 +14805,7 @@ s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
>  {
>    last_scheduled_insn = NULL;
>    memset (last_scheduled_unit_distance, 0, MAX_SCHED_UNITS * sizeof (int));
> +  s390_last_sched_state = s390_sched_state;
>    s390_sched_state = 0;

At the very first execution of s390_sched_init s390_sched_state would be used without being
initialized first. Well it is a static int and hence set to zero. But perhaps it makes sense to make
this explicit.

Preserving the sched state across basic blocks for your case works only if the BBs are traversed
with the fall through edges coming first. Is that the case? We probably should have a description
for s390_last_sched_state stating this.

-Andreas-
Robin Dapp Oct. 17, 2017, 1:41 p.m. | #2
> Can't we just set s390_sched_state to s390_last_sched_state in
> s390_sched_init.

Good idea, this simplifies the code quite a bit.

> Preserving the sched state across basic blocks for your case works
> only if the BBs are traversed with the fall through edges coming
> first. Is that the case? We probably should have a description for
> s390_last_sched_state stating this.

According to the documentation the basic blocks should be traversed in
"instruction stream" order, i.e. a fallthru edge will be considered
first as far as I understand it.

Regards
 Robin

--

gcc/ChangeLog:

2017-10-17  Robin Dapp  <rdapp@linux.vnet.ibm.com>

	* config/s390/s390.c (s390_bb_fallthru_entry_likely): New
	function.
	(s390_sched_init): Do not reset s390_sched_state if we entered
	the current basic block via a fallthru edge and all others are
	very unlikely.
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index c1a144e..dac81bc 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -83,6 +83,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "symbol-summary.h"
 #include "ipa-prop.h"
 #include "ipa-fnsummary.h"
+#include "sched-int.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -14346,6 +14347,28 @@ s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
   ready[0] = tmp;
 }
 
+/* Returns TRUE if BB is entered via a fallthru edge and all other
+   incoming edges are less than unlikely.  */
+static bool
+s390_bb_fallthru_entry_likely (basic_block bb)
+{
+  edge e, fallthru_edge;
+  edge_iterator ei;
+
+  if (!bb)
+    return false;
+
+  fallthru_edge = find_fallthru_edge (bb->preds);
+  if (!fallthru_edge)
+    return false;
+
+  FOR_EACH_EDGE (e, ei, bb->preds)
+    if (e != fallthru_edge
+	&& e->probability >= profile_probability::unlikely ())
+      return false;
+
+  return true;
+}
 
 /* The s390_sched_state variable tracks the state of the current or
    the last instruction group.
@@ -14354,7 +14377,7 @@ s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
    3     the last group is complete - normal insns
    4     the last group was a cracked/expanded insn */
 
-static int s390_sched_state;
+static int s390_sched_state = 0;
 
 #define S390_SCHED_STATE_NORMAL  3
 #define S390_SCHED_STATE_CRACKED 4
@@ -14764,7 +14787,17 @@ s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
 {
   last_scheduled_insn = NULL;
   memset (last_scheduled_unit_distance, 0, MAX_SCHED_UNITS * sizeof (int));
-  s390_sched_state = 0;
+
+  /* If the next basic block is most likely entered via a fallthru edge
+     we keep the last sched state.  Otherwise we start a new group.
+     The scheduler traverses basic blocks in "instruction stream" ordering
+     so if we see a fallthru edge here, s390_sched_state will be of its
+     source block.  */
+  rtx_insn *insn = current_sched_info->prev_head
+    ? NEXT_INSN (current_sched_info->prev_head) : NULL;
+  basic_block bb = insn ? BLOCK_FOR_INSN (insn) : NULL;
+  if (!s390_bb_fallthru_entry_likely (bb))
+    s390_sched_state = 0;
 }
 
 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
Robin Dapp Oct. 18, 2017, 9:19 a.m. | #3
> Preserving the sched state across basic blocks for your case works only if the BBs are traversed
> with the fall through edges coming first. Is that the case? We probably should have a description
> for s390_last_sched_state stating this.

Committed as attached with an additional comment and a check for >= z13.

Regards
 Robin
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index c1a144e..6f1e793 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -83,6 +83,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "symbol-summary.h"
 #include "ipa-prop.h"
 #include "ipa-fnsummary.h"
+#include "sched-int.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -14346,6 +14347,28 @@ s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
   ready[0] = tmp;
 }
 
+/* Returns TRUE if BB is entered via a fallthru edge and all other
+   incoming edges are less than unlikely.  */
+static bool
+s390_bb_fallthru_entry_likely (basic_block bb)
+{
+  edge e, fallthru_edge;
+  edge_iterator ei;
+
+  if (!bb)
+    return false;
+
+  fallthru_edge = find_fallthru_edge (bb->preds);
+  if (!fallthru_edge)
+    return false;
+
+  FOR_EACH_EDGE (e, ei, bb->preds)
+    if (e != fallthru_edge
+	&& e->probability >= profile_probability::unlikely ())
+      return false;
+
+  return true;
+}
 
 /* The s390_sched_state variable tracks the state of the current or
    the last instruction group.
@@ -14354,7 +14377,7 @@ s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
    3     the last group is complete - normal insns
    4     the last group was a cracked/expanded insn */
 
-static int s390_sched_state;
+static int s390_sched_state = 0;
 
 #define S390_SCHED_STATE_NORMAL  3
 #define S390_SCHED_STATE_CRACKED 4
@@ -14764,7 +14787,21 @@ s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
 {
   last_scheduled_insn = NULL;
   memset (last_scheduled_unit_distance, 0, MAX_SCHED_UNITS * sizeof (int));
-  s390_sched_state = 0;
+
+  /* If the next basic block is most likely entered via a fallthru edge
+     we keep the last sched state.  Otherwise we start a new group.
+     The scheduler traverses basic blocks in "instruction stream" ordering
+     so if we see a fallthru edge here, s390_sched_state will be of its
+     source block.
+
+     current_sched_info->prev_head is the insn before the first insn of the
+     block of insns to be scheduled.
+     */
+  rtx_insn *insn = current_sched_info->prev_head
+    ? NEXT_INSN (current_sched_info->prev_head) : NULL;
+  basic_block bb = insn ? BLOCK_FOR_INSN (insn) : NULL;
+  if (s390_tune < PROCESSOR_2964_Z13 || !s390_bb_fallthru_entry_likely (bb))
+    s390_sched_state = 0;
 }
 
 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates

Patch

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 2411c67..7f08ba2 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -14346,6 +14346,31 @@  s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
   ready[0] = tmp;
 }
 
+static bool
+s390_has_ok_fallthru (rtx_insn *insn)
+{
+  edge e, fallthru_edge;
+  edge_iterator ei;
+  bool other_likely_edge = false;
+
+  fallthru_edge =
+    find_fallthru_edge (BLOCK_FOR_INSN (insn)->preds);
+  FOR_EACH_EDGE (e, ei, BLOCK_FOR_INSN (insn)->preds)
+    {
+      if (e != fallthru_edge
+	  && e->probability >= profile_probability::unlikely ())
+	{
+	  other_likely_edge = true;
+	  break;
+	}
+    }
+
+  if (fallthru_edge && !other_likely_edge)
+    return true;
+
+  return false;
+}
+
 
 /* The s390_sched_state variable tracks the state of the current or
    the last instruction group.
@@ -14355,6 +14380,7 @@  s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
    4     the last group was a cracked/expanded insn */
 
 static int s390_sched_state;
+static int s390_last_sched_state;
 
 #define S390_SCHED_STATE_NORMAL  3
 #define S390_SCHED_STATE_CRACKED 4
@@ -14430,7 +14456,14 @@  s390_sched_score (rtx_insn *insn)
   unsigned int mask = s390_get_sched_attrmask (insn);
   int score = 0;
 
-  switch (s390_sched_state)
+  int temp_sched_state = s390_sched_state;
+
+  if (s390_sched_state < s390_last_sched_state
+      && s390_last_sched_state < S390_SCHED_STATE_NORMAL
+      && s390_has_ok_fallthru (insn))
+    temp_sched_state = s390_last_sched_state;
+
+  switch (temp_sched_state)
     {
     case 0:
       /* Try to put insns into the first slot which would otherwise
@@ -14656,11 +14689,15 @@  s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
 	  switch (s390_sched_state)
 	    {
 	    case 0:
+	      if (s390_last_sched_state > 0 && s390_has_ok_fallthru (insn))
+		s390_sched_state = s390_last_sched_state;
+
+	      if (s390_sched_state == 0)
+		starts_group = true;
+	      /* fallthrough */
 	    case 1:
 	    case 2:
 	    case S390_SCHED_STATE_NORMAL:
-	      if (s390_sched_state == 0)
-		starts_group = true;
 	      if (s390_sched_state == S390_SCHED_STATE_NORMAL)
 		{
 		  starts_group = true;
@@ -14768,6 +14805,7 @@  s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
 {
   last_scheduled_insn = NULL;
   memset (last_scheduled_unit_distance, 0, MAX_SCHED_UNITS * sizeof (int));
+  s390_last_sched_state = s390_sched_state;
   s390_sched_state = 0;
 }