diff mbox

[SPARC] Follow-up to latest LEON3 workaround

Message ID 1429980.rs0hm5uZQe@polaris
State New
Headers show

Commit Message

Eric Botcazou March 15, 2014, 9:40 p.m. UTC
This is a follow-up to
  http://gcc.gnu.org/ml/gcc-patches/2013-07/msg00959.html
which implemented the workaround for the data cache nullify issues on LEON3.

The errata sheet wasn't crystal clear and didn't say anything about delay 
slots, but it appears that they can come into play so the attached patch 
prevents integer loads from being put in delay slots with -mfix-ut699.

Tested on SPARC/Solaris, applied on the mainline and 4.8 branch.


2014-03-15  Eric Botcazou  <ebotcazou@adacore.com>

	* config/sparc/sparc-protos.h (tls_call_delay): Delete.
	(eligible_for_call_delay): New prototype.
	* config/sparc/sparc.c (tls_call_delay): Rename into...
	(eligible_for_call_delay): ...this.  Return false if the instruction
	cannot be put in the delay slot of a branch.
	(eligible_for_restore_insn): Simplify.
	(eligible_for_return_delay): Return false if the instruction cannot be
	put in the delay slot of a branch and simplify.
	(eligible_for_sibcall_delay): Return false if the instruction cannot be
	put in the delay slot of a branch.
	* config/sparc/sparc.md (fix_ut699): New attribute.
	(tls_call_delay): Delete.
	(in_call_delay): Reimplement.
	(eligible_for_sibcall_delay): Rename into...
	(in_sibcall_delay): ...this.
	(eligible_for_return_delay): Rename into...
	(in_return_delay): ...this.
	(in_branch_delay): Reimplement.
	(in_uncond_branch_delay): Delete.
	(in_annul_branch_delay): Delete.
diff mbox

Patch

Index: config/sparc/sparc.md
===================================================================
--- config/sparc/sparc.md	(revision 208588)
+++ config/sparc/sparc.md	(working copy)
@@ -291,7 +291,8 @@  (define_attr "branch_type" "none,icc,fcc
   (const_string "none"))
 
 (define_attr "pic" "false,true"
-  (symbol_ref "(flag_pic != 0 ? PIC_TRUE : PIC_FALSE)"))
+  (symbol_ref "(flag_pic != 0
+		? PIC_TRUE : PIC_FALSE)"))
 
 (define_attr "calls_alloca" "false,true"
   (symbol_ref "(cfun->calls_alloca != 0
@@ -313,6 +314,10 @@  (define_attr "flat" "false,true"
   (symbol_ref "(TARGET_FLAT != 0
 		? FLAT_TRUE : FLAT_FALSE)"))
 
+(define_attr "fix_ut699" "false,true"
+   (symbol_ref "(sparc_fix_ut699 != 0
+		 ? FIX_UT699_TRUE : FIX_UT699_FALSE)"))
+
 ;; Length (in # of insns).
 ;; Beware that setting a length greater or equal to 3 for conditional branches
 ;; has a side-effect (see output_cbranch and output_v9branch).
@@ -427,32 +432,18 @@  (define_asm_attributes
   [(set_attr "length" "2")
    (set_attr "type" "multi")])
 
-;; Attributes for instruction and branch scheduling
-(define_attr "tls_call_delay" "false,true"
-  (symbol_ref "(tls_call_delay (insn)
-		? TLS_CALL_DELAY_TRUE : TLS_CALL_DELAY_FALSE)"))
-
+;; Attributes for branch scheduling
 (define_attr "in_call_delay" "false,true"
-  (cond [(eq_attr "type" "uncond_branch,branch,cbcond,uncond_cbcond,call,sibcall,call_no_delay_slot,multi")
-		(const_string "false")
-	 (eq_attr "type" "load,fpload,store,fpstore")
-		(if_then_else (eq_attr "length" "1")
-			      (const_string "true")
-			      (const_string "false"))]
-	 (if_then_else (and (eq_attr "length" "1")
-			    (eq_attr "tls_call_delay" "true"))
-		       (const_string "true")
-		       (const_string "false"))))
+  (symbol_ref "(eligible_for_call_delay (insn)
+		? IN_CALL_DELAY_TRUE : IN_CALL_DELAY_FALSE)"))
 
-(define_attr "eligible_for_sibcall_delay" "false,true"
+(define_attr "in_sibcall_delay" "false,true"
   (symbol_ref "(eligible_for_sibcall_delay (insn)
-		? ELIGIBLE_FOR_SIBCALL_DELAY_TRUE
-		: ELIGIBLE_FOR_SIBCALL_DELAY_FALSE)"))
+		? IN_SIBCALL_DELAY_TRUE : IN_SIBCALL_DELAY_FALSE)"))
 
-(define_attr "eligible_for_return_delay" "false,true"
+(define_attr "in_return_delay" "false,true"
   (symbol_ref "(eligible_for_return_delay (insn)
-		? ELIGIBLE_FOR_RETURN_DELAY_TRUE
-		: ELIGIBLE_FOR_RETURN_DELAY_FALSE)"))
+		? IN_RETURN_DELAY_TRUE : IN_RETURN_DELAY_FALSE)"))
 
 ;; ??? !v9: Should implement the notion of predelay slots for floating-point
 ;; branches.  This would allow us to remove the nop always inserted before
@@ -467,39 +458,28 @@  (define_attr "eligible_for_return_delay"
 ;; because it prevents us from moving back the final store of inner loops.
 
 (define_attr "in_branch_delay" "false,true"
-  (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbcond,uncond_cbcond,call,sibcall,call_no_delay_slot,multi")
-		     (eq_attr "length" "1"))
-		(const_string "true")
-		(const_string "false")))
-
-(define_attr "in_uncond_branch_delay" "false,true"
-  (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbcond,uncond_cbcond,call,sibcall,call_no_delay_slot,multi")
-		     (eq_attr "length" "1"))
-		(const_string "true")
-		(const_string "false")))
-
-(define_attr "in_annul_branch_delay" "false,true"
-  (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbcond,uncond_cbcond,call,sibcall,call_no_delay_slot,multi")
-		     (eq_attr "length" "1"))
-		(const_string "true")
-		(const_string "false")))
+  (cond [(eq_attr "type" "uncond_branch,branch,cbcond,uncond_cbcond,call,sibcall,call_no_delay_slot,multi")
+	   (const_string "false")
+	 (and (eq_attr "fix_ut699" "true") (eq_attr "type" "load,sload"))
+	   (const_string "false")
+	 (eq_attr "length" "1")
+	   (const_string "true")
+	] (const_string "false")))
 
 (define_delay (eq_attr "type" "call")
   [(eq_attr "in_call_delay" "true") (nil) (nil)])
 
 (define_delay (eq_attr "type" "sibcall")
-  [(eq_attr "eligible_for_sibcall_delay" "true") (nil) (nil)])
+  [(eq_attr "in_sibcall_delay" "true") (nil) (nil)])
+
+(define_delay (eq_attr "type" "return")
+  [(eq_attr "in_return_delay" "true") (nil) (nil)])
 
 (define_delay (eq_attr "type" "branch")
-  [(eq_attr "in_branch_delay" "true")
-   (nil) (eq_attr "in_annul_branch_delay" "true")])
+  [(eq_attr "in_branch_delay" "true") (nil) (eq_attr "in_branch_delay" "true")])
 
 (define_delay (eq_attr "type" "uncond_branch")
-  [(eq_attr "in_uncond_branch_delay" "true")
-   (nil) (nil)])
-
-(define_delay (eq_attr "type" "return")
-  [(eq_attr "eligible_for_return_delay" "true") (nil) (nil)])
+  [(eq_attr "in_branch_delay" "true") (nil) (nil)])
 
 
 ;; Include SPARC DFA schedulers
Index: config/sparc/sparc-protos.h
===================================================================
--- config/sparc/sparc-protos.h	(revision 208588)
+++ config/sparc/sparc-protos.h	(working copy)
@@ -86,9 +86,9 @@  extern int mems_ok_for_ldd_peep (rtx, rt
 extern rtx widen_mem_for_ldd_peep (rtx, rtx, enum machine_mode);
 extern int empty_delay_slot (rtx);
 extern int emit_cbcond_nop (rtx);
+extern int eligible_for_call_delay (rtx);
 extern int eligible_for_return_delay (rtx);
 extern int eligible_for_sibcall_delay (rtx);
-extern int tls_call_delay (rtx);
 extern int emit_move_sequence (rtx, enum machine_mode);
 extern int fp_sethi_p (rtx);
 extern int fp_mov_p (rtx);
Index: config/sparc/sparc.c
===================================================================
--- config/sparc/sparc.c	(revision 208588)
+++ config/sparc/sparc.c	(working copy)
@@ -3381,10 +3381,13 @@  emit_cbcond_nop (rtx insn)
 /* Return nonzero if TRIAL can go into the call delay slot.  */
 
 int
-tls_call_delay (rtx trial)
+eligible_for_call_delay (rtx trial)
 {
   rtx pat;
 
+  if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
+    return 0;
+
   /* Binutils allows
        call __tls_get_addr, %tgd_call (foo)
         add %l7, %o0, %o0, %tgd_add (foo)
@@ -3466,11 +3469,7 @@  eligible_for_restore_insn (rtx trial, bo
 
   /* If we have the 'return' instruction, anything that does not use
      local or output registers and can go into a delay slot wins.  */
-  else if (return_p
-	   && TARGET_V9
-	   && !epilogue_renumber (&pat, 1)
-	   && get_attr_in_uncond_branch_delay (trial)
-	       == IN_UNCOND_BRANCH_DELAY_TRUE)
+  else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
     return 1;
 
   /* The 'restore src1,src2,dest' pattern for SImode.  */
@@ -3513,21 +3512,20 @@  eligible_for_return_delay (rtx trial)
   int regno;
   rtx pat;
 
-  if (! NONJUMP_INSN_P (trial))
-    return 0;
-
-  if (get_attr_length (trial) != 1)
-    return 0;
-
   /* If the function uses __builtin_eh_return, the eh_return machinery
      occupies the delay slot.  */
   if (crtl->calls_eh_return)
     return 0;
 
+  if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
+    return 0;
+
   /* In the case of a leaf or flat function, anything can go into the slot.  */
   if (sparc_leaf_function_p || TARGET_FLAT)
-    return
-      get_attr_in_uncond_branch_delay (trial) == IN_UNCOND_BRANCH_DELAY_TRUE;
+    return 1;
+
+  if (!NONJUMP_INSN_P (trial))
+    return 0;
 
   pat = PATTERN (trial);
   if (GET_CODE (pat) == PARALLEL)
@@ -3547,9 +3545,7 @@  eligible_for_return_delay (rtx trial)
 	  if (regno >= 8 && regno < 24)
 	    return 0;
 	}
-      return !epilogue_renumber (&pat, 1)
-	&& (get_attr_in_uncond_branch_delay (trial)
-	    == IN_UNCOND_BRANCH_DELAY_TRUE);
+      return !epilogue_renumber (&pat, 1);
     }
 
   if (GET_CODE (pat) != SET)
@@ -3569,10 +3565,7 @@  eligible_for_return_delay (rtx trial)
      instruction, it can probably go in.  But restore will not work
      with FP_REGS.  */
   if (! SPARC_INT_REG_P (regno))
-    return (TARGET_V9
-	    && !epilogue_renumber (&pat, 1)
-	    && get_attr_in_uncond_branch_delay (trial)
-	       == IN_UNCOND_BRANCH_DELAY_TRUE);
+    return TARGET_V9 && !epilogue_renumber (&pat, 1);
 
   return eligible_for_restore_insn (trial, true);
 }
@@ -3584,10 +3577,10 @@  eligible_for_sibcall_delay (rtx trial)
 {
   rtx pat;
 
-  if (! NONJUMP_INSN_P (trial) || GET_CODE (PATTERN (trial)) != SET)
+  if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
     return 0;
 
-  if (get_attr_length (trial) != 1)
+  if (!NONJUMP_INSN_P (trial))
     return 0;
 
   pat = PATTERN (trial);
@@ -3606,6 +3599,9 @@  eligible_for_sibcall_delay (rtx trial)
       return 1;
     }
 
+  if (GET_CODE (pat) != SET)
+    return 0;
+
   /* Otherwise, only operations which can be done in tandem with
      a `restore' insn can go into the delay slot.  */
   if (GET_CODE (SET_DEST (pat)) != REG