Patchwork Fill more delay slots in conditional returns

login
register
mail settings
Submitter Steven Bosscher
Date April 3, 2013, 8:20 p.m.
Message ID <CABu31nP336+06GqbNEuv7zK+VdO8E9xW1K9+AWZPN8ShaWh76w@mail.gmail.com>
Download mbox | patch
Permalink /patch/233582/
State New
Headers show

Comments

Steven Bosscher - April 3, 2013, 8:20 p.m.
On Tue, Apr 2, 2013 at 6:56 PM, Steven Bosscher wrote:
> The SPARC back
> end also calls dbr_schedule() in its machine_reorg pass to work around
> errata in Atmel AT697F chips (LEON2-like, i.e. fairly new, see
> r179921).

Thinking about this some more: This could be fixed by inserting a
machine-specific pass just after delayed-branch scheduling, like in
the attached patch. I think the same is possible with the dbr_schedule
call in the MIPS backend.

Eric, what do you think of this approach?

With those two dbr_schedule calls out of the way, it will be a lot
easier to change things such that pass_free_cfg can run after
pass_machine_reorg (and after pass_cleanup_barriers that can be
simplified if there's still a CFG around). It will also help make the
DELAY_SLOTS hack in cfgrtl.c:rest_of_pass_free_cfg redundant.

I'm unsure how to test this patch. It's going through a bootstrap&test
cycle on sparc64-unknown-linux-gnu at the moment (on gcc62) but there
are no test cases for the errata that are being worked around...

Ciao!
Steven
* config/sparc/sparc.c: Include tree-pass.h.
	(TARGET_MACHINE_DEPENDENT_REORG): Do not redefine.
	(sparc_reorg): Rename to sparc_do_work_around_errata.  Move to
	head of file.  Change return type.  Split off gate function.
	(sparc_gate_work_around_errata): New function.
	(pass_work_around_errata): New pass definition.
	(insert_pass_work_around_errata) New pass insert definition to
	insert pass_work_around_errata just after delayed-branch scheduling.
	(sparc_option_override): Insert the pass.
Eric Botcazou - April 5, 2013, 8:22 a.m.
> Thinking about this some more: This could be fixed by inserting a
> machine-specific pass just after delayed-branch scheduling, like in
> the attached patch. I think the same is possible with the dbr_schedule
> call in the MIPS backend.
> 
> Eric, what do you think of this approach?

No objections on principle from a SPARC viewpoint.  But can we really control 
when the pass is run with register_pass?  Because it needs to be run _before_ 
branch shortening.

> With those two dbr_schedule calls out of the way, it will be a lot
> easier to change things such that pass_free_cfg can run after
> pass_machine_reorg (and after pass_cleanup_barriers that can be
> simplified if there's still a CFG around). It will also help make the
> DELAY_SLOTS hack in cfgrtl.c:rest_of_pass_free_cfg redundant.

I agree that we should get rid of MIPS/SPARC's dbr_schedule shuffling.
Steven Bosscher - April 5, 2013, 4:16 p.m.
On Fri, Apr 5, 2013 at 10:22 AM, Eric Botcazou wrote:
>> Thinking about this some more: This could be fixed by inserting a
>> machine-specific pass just after delayed-branch scheduling, like in
>> the attached patch. I think the same is possible with the dbr_schedule
>> call in the MIPS backend.
>>
>> Eric, what do you think of this approach?
>
> No objections on principle from a SPARC viewpoint.  But can we really control
> when the pass is run with register_pass?  Because it needs to be run _before_
> branch shortening.

Yes, we can control that. In this particular case, register_pass() is
told to insert the pass immediately after the first (and only)
instance of the pass named "dbr" i.e. pass_delay_slots.
position_pass() will look through the pass list and performs the
insertion in the right place. The new pass is inserted between
pass_delay_slots and pass_split_for_shorten_branches.

Without the inserted pass, before the patch, the pass chain looks like this:

          NEXT_PASS (pass_machine_reorg);
          // sparc_reorg calls cleanup_barriers and dbr_schedule
          NEXT_PASS (pass_cleanup_barriers);
          NEXT_PASS (pass_delay_slots);
          NEXT_PASS (pass_split_for_shorten_branches);
          NEXT_PASS (pass_convert_to_eh_region_ranges);
          NEXT_PASS (pass_shorten_branches);

After the patch, it looks like this:

          NEXT_PASS (pass_machine_reorg); // now a NOP for sparc
          NEXT_PASS (pass_cleanup_barriers);
          NEXT_PASS (pass_delay_slots);
          NEXT_PASS (pass_work_around_errata);
          NEXT_PASS (pass_split_for_shorten_branches);
          NEXT_PASS (pass_convert_to_eh_region_ranges);
          NEXT_PASS (pass_shorten_branches);

I've confirmed this also by printing the name for each pass in the chain.

Ciao!
Steven

Patch

Index: config/sparc/sparc.c
===================================================================
--- config/sparc/sparc.c	(revision 197452)
+++ config/sparc/sparc.c	(working copy)
@@ -52,6 +52,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "params.h"
 #include "df.h"
 #include "opts.h"
+#include "tree-pass.h"
 
 /* Processor costs */
 
@@ -538,7 +539,6 @@  static void sparc_output_mi_thunk (FILE *, tree, H
 				   HOST_WIDE_INT, tree);
 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
 				       HOST_WIDE_INT, const_tree);
-static void sparc_reorg (void);
 static struct machine_function * sparc_init_machine_status (void);
 static bool sparc_cannot_force_const_mem (enum machine_mode, rtx);
 static rtx sparc_tls_get_addr (void);
@@ -680,9 +680,6 @@  char sparc_hard_reg_printed[8];
 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
 
-#undef TARGET_MACHINE_DEPENDENT_REORG
-#define TARGET_MACHINE_DEPENDENT_REORG sparc_reorg
-
 #undef TARGET_RTX_COSTS
 #define TARGET_RTX_COSTS sparc_rtx_costs
 #undef TARGET_ADDRESS_COST
@@ -804,6 +801,136 @@  char sparc_hard_reg_printed[8];
 
 struct gcc_target targetm = TARGET_INITIALIZER;
 
+/* We use the machine specific reorg pass to enable workarounds for errata.
+   We need to have the (essentially) final form of the insn stream in order
+   to properly detect the various hazards.  Therefore, this machine specific
+   pass runs as late as possible.  The pass is inserted in the pass pipeline
+   at the end of sparc_options_override().  */
+
+static bool
+sparc_gate_work_arround_errata (void)
+{
+  /* The only erratum we handle for now is that of the AT697F processor.  */
+  return sparc_fix_at697f != 0;
+}
+
+static unsigned int
+sparc_do_work_around_errata (void)
+{
+  rtx insn, next;
+
+  /* Now look for specific patterns in the insn stream.  */
+  for (insn = get_insns (); insn; insn = next)
+    {
+      bool insert_nop = false;
+      rtx set;
+
+      /* Look for a single-word load into an odd-numbered FP register.  */
+      if (NONJUMP_INSN_P (insn)
+	  && (set = single_set (insn)) != NULL_RTX
+	  && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
+	  && MEM_P (SET_SRC (set))
+	  && REG_P (SET_DEST (set))
+	  && REGNO (SET_DEST (set)) > 31
+	  && REGNO (SET_DEST (set)) % 2 != 0)
+	{
+	  /* The wrong dependency is on the enclosing double register.  */
+	  unsigned int x = REGNO (SET_DEST (set)) - 1;
+	  unsigned int src1, src2, dest;
+	  int code;
+
+	  /* If the insn has a delay slot, then it cannot be problematic.  */
+	  next = next_active_insn (insn);
+	  if (NONJUMP_INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE)
+	    code = -1;
+	  else
+	    {
+	      extract_insn (next);
+	      code = INSN_CODE (next);
+	    }
+
+	  switch (code)
+	    {
+	    case CODE_FOR_adddf3:
+	    case CODE_FOR_subdf3:
+	    case CODE_FOR_muldf3:
+	    case CODE_FOR_divdf3:
+	      dest = REGNO (recog_data.operand[0]);
+	      src1 = REGNO (recog_data.operand[1]);
+	      src2 = REGNO (recog_data.operand[2]);
+	      if (src1 != src2)
+		{
+		  /* Case [1-4]:
+				 ld [address], %fx+1
+				 FPOPd %f{x,y}, %f{y,x}, %f{x,y}  */
+		  if ((src1 == x || src2 == x)
+		      && (dest == src1 || dest == src2))
+		    insert_nop = true;
+		}
+	      else
+		{
+		  /* Case 5:
+			     ld [address], %fx+1
+			     FPOPd %fx, %fx, %fx  */
+		  if (src1 == x
+		      && dest == src1
+		      && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
+		    insert_nop = true;
+		}
+	      break;
+
+	    case CODE_FOR_sqrtdf2:
+	      dest = REGNO (recog_data.operand[0]);
+	      src1 = REGNO (recog_data.operand[1]);
+	      /* Case 6:
+			 ld [address], %fx+1
+			 fsqrtd %fx, %fx  */
+	      if (src1 == x && dest == src1)
+		insert_nop = true;
+	      break;
+
+	    default:
+	      break;
+	    }
+	}
+      else
+	next = NEXT_INSN (insn);
+
+      if (insert_nop)
+	emit_insn_after (gen_nop (), insn);
+    }
+  return 0;
+}
+
+struct rtl_opt_pass pass_work_around_errata =
+{
+ {
+  RTL_PASS,
+  "errata",				/* name */
+  OPTGROUP_NONE,			/* optinfo_flags */
+  sparc_gate_work_around_errata,	/* gate */
+  sparc_do_work_around_errata,		/* execute */
+  NULL,					/* sub */
+  NULL,					/* next */
+  0,					/* static_pass_number */
+  TV_MACH_DEP,				/* tv_id */
+  0,					/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  TODO_verify_rtl_sharing		/* todo_flags_finish */
+ }
+};
+
+struct register_pass_info insert_pass_work_around_errata =
+{
+  &pass_work_around_errata,	/* pass */
+  "dbr",			/* reference_pass_name */
+  1,				/* ref_pass_instance_number */
+  PASS_POS_INSERT_AFTER		/* po_op */
+};
+
+/* Helpers for TARGET_DEBUG_OPTIONS.  */
 static void
 dump_target_flag_bits (const int flags)
 {
@@ -1241,6 +1368,13 @@  sparc_option_override (void)
      pessimizes for double floating-point registers.  */
   if (!global_options_set.x_flag_ira_share_save_slots)
     flag_ira_share_save_slots = 0;
+
+  /* We register a machine specific pass to work around errata, if any.
+     The pass mut be scheduled as late as possible so that we have the
+     (essentially) final form of the insn stream to work on.
+     Registering the pass must be done at start up.  It's convenient to
+     do it here.  */
+  register_pass (&insert_pass_work_around_errata);
 }
 
 /* Miscellaneous utilities.  */
@@ -10894,107 +11028,6 @@  sparc_can_output_mi_thunk (const_tree thunk_fndecl
   return (vcall_offset >= -32768 || ! fixed_regs[5]);
 }
 
-/* We use the machine specific reorg pass to enable workarounds for errata.  */
-
-static void
-sparc_reorg (void)
-{
-  rtx insn, next;
-
-  /* The only erratum we handle for now is that of the AT697F processor.  */
-  if (!sparc_fix_at697f)
-    return;
-
-  /* We need to have the (essentially) final form of the insn stream in order
-     to properly detect the various hazards.  Run delay slot scheduling.  */
-  if (optimize > 0 && flag_delayed_branch)
-    {
-      cleanup_barriers ();
-      dbr_schedule (get_insns ());
-    }
-
-  /* Now look for specific patterns in the insn stream.  */
-  for (insn = get_insns (); insn; insn = next)
-    {
-      bool insert_nop = false;
-      rtx set;
-
-      /* Look for a single-word load into an odd-numbered FP register.  */
-      if (NONJUMP_INSN_P (insn)
-	  && (set = single_set (insn)) != NULL_RTX
-	  && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
-	  && MEM_P (SET_SRC (set))
-	  && REG_P (SET_DEST (set))
-	  && REGNO (SET_DEST (set)) > 31
-	  && REGNO (SET_DEST (set)) % 2 != 0)
-	{
-	  /* The wrong dependency is on the enclosing double register.  */
-	  unsigned int x = REGNO (SET_DEST (set)) - 1;
-	  unsigned int src1, src2, dest;
-	  int code;
-
-	  /* If the insn has a delay slot, then it cannot be problematic.  */
-	  next = next_active_insn (insn);
-	  if (NONJUMP_INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE)
-	    code = -1;
-	  else
-	    {
-	      extract_insn (next);
-	      code = INSN_CODE (next);
-	    }
-
-	  switch (code)
-	    {
-	    case CODE_FOR_adddf3:
-	    case CODE_FOR_subdf3:
-	    case CODE_FOR_muldf3:
-	    case CODE_FOR_divdf3:
-	      dest = REGNO (recog_data.operand[0]);
-	      src1 = REGNO (recog_data.operand[1]);
-	      src2 = REGNO (recog_data.operand[2]);
-	      if (src1 != src2)
-		{
-		  /* Case [1-4]:
-				 ld [address], %fx+1
-				 FPOPd %f{x,y}, %f{y,x}, %f{x,y}  */
-		  if ((src1 == x || src2 == x)
-		      && (dest == src1 || dest == src2))
-		    insert_nop = true;
-		}
-	      else
-		{
-		  /* Case 5:
-			     ld [address], %fx+1
-			     FPOPd %fx, %fx, %fx  */
-		  if (src1 == x
-		      && dest == src1
-		      && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
-		    insert_nop = true;
-		}
-	      break;
-
-	    case CODE_FOR_sqrtdf2:
-	      dest = REGNO (recog_data.operand[0]);
-	      src1 = REGNO (recog_data.operand[1]);
-	      /* Case 6:
-			 ld [address], %fx+1
-			 fsqrtd %fx, %fx  */
-	      if (src1 == x && dest == src1)
-		insert_nop = true;
-	      break;
-
-	    default:
-	      break;
-	    }
-	}
-      else
-	next = NEXT_INSN (insn);
-
-      if (insert_nop)
-	emit_insn_after (gen_nop (), insn);
-    }
-}
-
 /* How to allocate a 'struct machine_function'.  */
 
 static struct machine_function *