[6/6,ARC] Handle store cacheline hazard.

Message ID 20181010080016.12317-7-claziss@gmail.com
State New
Headers show
Series
  • ARC updates
Related show

Commit Message

Claudiu Zissulescu Oct. 10, 2018, 8 a.m.
Handle store cacheline hazard for A700 cpus by inserting two NOP_S
between ST ST LD or their logical equivalent (like ST ST NOP_S NOP_S
J_L.D LD)

gcc/
2016-08-01  Claudiu Zissulescu  <claziss@synopsys.com>

	* config/arc/arc-arch.h (ARC_TUNE_ARC7XX): New tune value.
	* config/arc/arc.c (arc_active_insn): New function.
	(check_store_cacheline_hazard): Likewise.
	(workaround_arc_anomaly): Use check_store_cacheline_hazard.
	(arc_override_options): Disable delay slot scheduler for older
	A7.
	(arc_store_addr_hazard_p): New implementation, old one renamed to
	...
	(arc_store_addr_hazard_internal_p): Renamed.
	(arc_reorg): Don't combine into brcc instructions which are part
	of hardware hazard solution.
	* config/arc/arc.md (attr tune): Consider new arc7xx tune value.
	(tune_arc700): Likewise.
	* config/arc/arc.opt (arc7xx): New tune value.
	* config/arc/arc700.md: Improve A7 scheduler.
---
 gcc/config/arc/arc-arch.h |   1 +
 gcc/config/arc/arc.c      | 142 ++++++++++++++++++++++++++++++++------
 gcc/config/arc/arc.md     |   8 ++-
 gcc/config/arc/arc.opt    |   3 +
 gcc/config/arc/arc700.md  |  18 +----
 5 files changed, 132 insertions(+), 40 deletions(-)

Patch

diff --git a/gcc/config/arc/arc-arch.h b/gcc/config/arc/arc-arch.h
index 859af0684b8..ad540607e55 100644
--- a/gcc/config/arc/arc-arch.h
+++ b/gcc/config/arc/arc-arch.h
@@ -71,6 +71,7 @@  enum arc_tune_attr
   {
     ARC_TUNE_NONE,
     ARC_TUNE_ARC600,
+    ARC_TUNE_ARC7XX,
     ARC_TUNE_ARC700_4_2_STD,
     ARC_TUNE_ARC700_4_2_XMAC,
     ARC_TUNE_CORE_3,
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index ab7735d6b38..90454928379 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -1308,6 +1308,10 @@  arc_override_options (void)
   if (TARGET_LONG_CALLS_SET)
     target_flags &= ~MASK_MILLICODE_THUNK_SET;
 
+  /* A7 has an issue with delay slots.  */
+  if (TARGET_ARC700 && (arc_tune != ARC_TUNE_ARC7XX))
+    flag_delayed_branch = 0;
+
   /* These need to be done at start up.  It's convenient to do them here.  */
   arc_init ();
 }
@@ -7529,11 +7533,91 @@  arc_invalid_within_doloop (const rtx_insn *insn)
   return NULL;
 }
 
+static rtx_insn *
+arc_active_insn (rtx_insn *insn)
+{
+  rtx_insn *nxt = next_active_insn (insn);
+
+  if (nxt && GET_CODE (PATTERN (nxt)) == ASM_INPUT)
+    nxt = next_active_insn (nxt);
+  return nxt;
+}
+
+/* Search for a sequence made out of two stores and a given number of
+   loads, insert a nop if required.  */
+
+static void
+check_store_cacheline_hazard (void)
+{
+  rtx_insn *insn, *succ0, *insn1;
+  bool found = false;
+
+  for (insn = get_insns (); insn; insn = arc_active_insn (insn))
+    {
+      succ0 = arc_active_insn (insn);
+
+      if (!succ0)
+	return;
+
+      if (!single_set (insn) || !single_set (succ0))
+	continue;
+
+      if ((get_attr_type (insn) != TYPE_STORE)
+	  || (get_attr_type (succ0) != TYPE_STORE))
+	continue;
+
+      /* Found at least two consecutive stores.  Goto the end of the
+	 store sequence.  */
+      for (insn1 = succ0; insn1; insn1 = arc_active_insn (insn1))
+	if (!single_set (insn1) || get_attr_type (insn1) != TYPE_STORE)
+	  break;
+
+      /* Now, check the next two instructions for the following cases:
+         1. next instruction is a LD => insert 2 nops between store
+	    sequence and load.
+	 2. next-next instruction is a LD => inset 1 nop after the store
+	    sequence.  */
+      if (insn1 && single_set (insn1)
+	  && (get_attr_type (insn1) == TYPE_LOAD))
+	{
+	  found = true;
+	  emit_insn_before (gen_nopv (), insn1);
+	  emit_insn_before (gen_nopv (), insn1);
+	}
+      else
+	{
+	  if (insn1 && (get_attr_type (insn1) == TYPE_COMPARE))
+	    {
+	      /* REG_SAVE_NOTE is used by Haifa scheduler, we are in
+		 reorg, so it is safe to reuse it for avoiding the
+		 current compare insn to be part of a BRcc
+		 optimization.  */
+	      add_reg_note (insn1, REG_SAVE_NOTE, GEN_INT (3));
+	    }
+	  insn1 = arc_active_insn (insn1);
+	  if (insn1 && single_set (insn1)
+	      && (get_attr_type (insn1) == TYPE_LOAD))
+	    {
+	      found = true;
+	      emit_insn_before (gen_nopv (), insn1);
+	    }
+	}
+
+      insn = insn1;
+      if (found)
+	{
+	  /* warning (0, "Potential lockup sequence found, patching"); */
+	  found = false;
+	}
+    }
+}
+
 /* Return true if a load instruction (CONSUMER) uses the same address as a
    store instruction (PRODUCER).  This function is used to avoid st/ld
    address hazard in ARC700 cores.  */
-bool
-arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer)
+
+static bool
+arc_store_addr_hazard_internal_p (rtx_insn* producer, rtx_insn* consumer)
 {
   rtx in_set, out_set;
   rtx out_addr, in_addr;
@@ -7581,6 +7665,14 @@  arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer)
   return false;
 }
 
+bool
+arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer)
+{
+  if (TARGET_ARC700 && (arc_tune != ARC_TUNE_ARC7XX))
+    return true;
+  return arc_store_addr_hazard_internal_p (producer, consumer);
+}
+
 /* The same functionality as arc_hazard.  It is called in machine
    reorg before any other optimization.  Hence, the NOP size is taken
    into account when doing branch shortening.  */
@@ -7589,6 +7681,7 @@  static void
 workaround_arc_anomaly (void)
 {
   rtx_insn *insn, *succ0;
+  rtx_insn *succ1;
 
   /* For any architecture: call arc_hazard here.  */
   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
@@ -7600,27 +7693,30 @@  workaround_arc_anomaly (void)
 	}
     }
 
-  if (TARGET_ARC700)
-    {
-      rtx_insn *succ1;
+  if (!TARGET_ARC700)
+    return;
 
-      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
-	{
-	  succ0 = next_real_insn (insn);
-	  if (arc_store_addr_hazard_p (insn, succ0))
-	    {
-	      emit_insn_after (gen_nopv (), insn);
-	      emit_insn_after (gen_nopv (), insn);
-	      continue;
-	    }
+  /* Old A7 are suffering of a cache hazard, and we need to insert two
+     nops between any sequence of stores and a load.  */
+  if (arc_tune != ARC_TUNE_ARC7XX)
+    check_store_cacheline_hazard ();
 
-	  /* Avoid adding nops if the instruction between the ST and LD is
-	     a call or jump.  */
-	  succ1 = next_real_insn (succ0);
-	  if (succ0 && !JUMP_P (succ0) && !CALL_P (succ0)
-	      && arc_store_addr_hazard_p (insn, succ1))
-	    emit_insn_after (gen_nopv (), insn);
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      succ0 = next_real_insn (insn);
+      if (arc_store_addr_hazard_internal_p (insn, succ0))
+	{
+	  emit_insn_after (gen_nopv (), insn);
+	  emit_insn_after (gen_nopv (), insn);
+	  continue;
 	}
+
+      /* Avoid adding nops if the instruction between the ST and LD is
+	 a call or jump.  */
+      succ1 = next_real_insn (succ0);
+      if (succ0 && !JUMP_P (succ0) && !CALL_P (succ0)
+	  && arc_store_addr_hazard_internal_p (insn, succ1))
+	emit_insn_after (gen_nopv (), insn);
     }
 }
 
@@ -8291,11 +8387,15 @@  arc_reorg (void)
 	      if (!link_insn)
 		continue;
 	      else
-		/* Check if this is a data dependency.  */
 		{
+		  /* Check if this is a data dependency.  */
 		  rtx op, cc_clob_rtx, op0, op1, brcc_insn, note;
 		  rtx cmp0, cmp1;
 
+		  /* Make sure we can use it for brcc insns.  */
+		  if (find_reg_note (link_insn, REG_SAVE_NOTE, GEN_INT (3)))
+		    continue;
+
 		  /* Ok this is the set cc. copy args here.  */
 		  op = XEXP (pc_target, 0);
 
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index fb8a1c9ee09..caf7deda505 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -600,11 +600,13 @@ 
 ;;   somehow modify them to become inelegible for delay slots if a decision
 ;;   is made that makes conditional execution required.
 
-(define_attr "tune" "none,arc600,arc700_4_2_std,arc700_4_2_xmac, core_3, \
-archs4x, archs4xd, archs4xd_slow"
+(define_attr "tune" "none,arc600,arc7xx,arc700_4_2_std,arc700_4_2_xmac, \
+core_3, archs4x, archs4xd, archs4xd_slow"
   (const
    (cond [(symbol_ref "arc_tune == TUNE_ARC600")
 	  (const_string "arc600")
+	  (symbol_ref "arc_tune == ARC_TUNE_ARC7XX")
+	  (const_string "arc7xx")
 	  (symbol_ref "arc_tune == TUNE_ARC700_4_2_STD")
 	  (const_string "arc700_4_2_std")
 	  (symbol_ref "arc_tune == TUNE_ARC700_4_2_XMAC")
@@ -619,7 +621,7 @@  archs4x, archs4xd, archs4xd_slow"
 	 (const_string "none"))))
 
 (define_attr "tune_arc700" "false,true"
-  (if_then_else (eq_attr "tune" "arc700_4_2_std, arc700_4_2_xmac")
+  (if_then_else (eq_attr "tune" "arc7xx, arc700_4_2_std, arc700_4_2_xmac")
 		(const_string "true")
 		(const_string "false")))
 
diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt
index 93e18af1d27..bcffb2720ba 100644
--- a/gcc/config/arc/arc.opt
+++ b/gcc/config/arc/arc.opt
@@ -262,6 +262,9 @@  Enum(arc_tune_attr) String(arc600) Value(ARC_TUNE_ARC600)
 EnumValue
 Enum(arc_tune_attr) String(arc601) Value(ARC_TUNE_ARC600)
 
+EnumValue
+Enum(arc_tune_attr) String(arc7xx) Value(ARC_TUNE_ARC7XX)
+
 EnumValue
 Enum(arc_tune_attr) String(arc700) Value(ARC_TUNE_ARC700_4_2_STD)
 
diff --git a/gcc/config/arc/arc700.md b/gcc/config/arc/arc700.md
index a0f9f74a9f2..cbb868d8dcd 100644
--- a/gcc/config/arc/arc700.md
+++ b/gcc/config/arc/arc700.md
@@ -145,28 +145,14 @@ 
 ; no functional unit runs when blockage is reserved
 (exclusion_set "blockage" "core, multiplier")
 
-(define_insn_reservation "data_load_DI" 4
-  (and (eq_attr "tune_arc700" "true")
-       (eq_attr "type" "load")
-       (match_operand:DI 0 "" ""))
-  "issue+dmp, issue+dmp, dmp_write_port, dmp_write_port")
-
 (define_insn_reservation "data_load" 3
   (and (eq_attr "tune_arc700" "true")
-       (eq_attr "type" "load")
-       (not (match_operand:DI 0 "" "")))
+       (eq_attr "type" "load"))
   "issue+dmp, nothing, dmp_write_port")
 
-(define_insn_reservation "data_store_DI" 2
-  (and (eq_attr "tune_arc700" "true")
-       (eq_attr "type" "store")
-       (match_operand:DI 0 "" ""))
-  "issue+dmp_write_port, issue+dmp_write_port")
-
 (define_insn_reservation "data_store" 1
   (and (eq_attr "tune_arc700" "true")
-       (eq_attr "type" "store")
-       (not (match_operand:DI 0 "" "")))
+       (eq_attr "type" "store"))
   "issue+dmp_write_port")
 
 (define_bypass 3 "data_store" "data_load" "arc_store_addr_hazard_p")