diff mbox

patch adding LRA usage for ppc

Message ID 526ADDC0.9000308@redhat.com
State New
Headers show

Commit Message

Vladimir Makarov Oct. 25, 2013, 9:08 p.m. UTC
I've committed the following patch to use LRA for ppc.  By default 
reload pass is used.  To use LRA, please add option -mlra.

   I also removed change in constraints for swap insn as it was 
requested by David Edelsohn.  It was necessary to fix one failure of new 
gcc tests.  I'll work on more safe fix for this later.

   The patch also fixes LRA crash on 32-bit SPEC2006 gamess reported by 
Mike Meissner.

   There is still LRA crash on SPEC2006 dealII in 32-bit mode with 
options used by Mike.  It is an issue with address constraints for 
TImode.  I'll work on this.  I guess address legitimize hook will solve 
the problem.  Currently LRA does not use this hook.  This hook is also 
needed to fix a LRA failures on ARM.

   Some tests for SDmode can fail for LRA as it generates a different 
code than reload.  If all toolchain is built with LRA, the failures will 
be gone.

   The patch was bootstrapped on ppc64 with LRA on/off.

   Committed as rev. 204079.

2013-10-25  Vladimir Makarov <vmakarov@redhat.com>

         * config/rs6000/rs6000-protos.h
         (rs6000_secondary_memory_needed_mode): New prototype.
         * config/rs6000/rs6000.c: Include ira.h.
         (TARGET_LRA_P): Redefine.
         (rs6000_legitimate_offset_address_p): Call
         legitimate_constant_pool_address_p in strict mode for LRA.
         (rs6000_legitimate_address_p): Ditto.
         (legitimate_lo_sum_address_p): Add code for LRA.
         Use lra_in_progress.
         (rs6000_emit_move): Add LRA version of code to generate load/store
         of SDmode values.
         (rs6000_secondary_memory_needed_mode): New.
         (rs6000_alloc_sdmode_stack_slot): Do nothing for LRA.
         (rs6000_secondary_reload_class): Return NO_REGS for LRA for
         constants, memory, and FP registers.
         (rs6000_lra_p): New.
         * config/rs6000/rs6000.h (SECONDARY_MEMORY_NEEDED_MODE): New
         macro.
         * config/rs6000/rs6000.opt (mlra): New option.
         * lra-spills.c (lra_final_code_change): Remove useless move insns.
diff mbox

Patch

Index: config/rs6000/rs6000-protos.h
===================================================================
--- config/rs6000/rs6000-protos.h	(revision 204075)
+++ config/rs6000/rs6000-protos.h	(working copy)
@@ -126,6 +126,8 @@  extern void rs6000_split_multireg_move (
 extern void rs6000_emit_le_vsx_move (rtx, rtx, enum machine_mode);
 extern void rs6000_emit_move (rtx, rtx, enum machine_mode);
 extern rtx rs6000_secondary_memory_needed_rtx (enum machine_mode);
+extern enum machine_mode rs6000_secondary_memory_needed_mode (enum
+							      machine_mode);
 extern rtx (*rs6000_legitimize_reload_address_ptr) (rtx, enum machine_mode,
 						    int, int, int, int *);
 extern bool rs6000_legitimate_offset_address_p (enum machine_mode, rtx,
Index: config/rs6000/rs6000.c
===================================================================
--- config/rs6000/rs6000.c	(revision 204075)
+++ config/rs6000/rs6000.c	(working copy)
@@ -55,6 +55,7 @@ 
 #include "intl.h"
 #include "params.h"
 #include "tm-constrs.h"
+#include "ira.h"
 #include "opts.h"
 #include "tree-vectorizer.h"
 #include "dumpfile.h"
@@ -1554,6 +1555,9 @@  static const struct attribute_spec rs600
 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
 
+#undef TARGET_LRA_P
+#define TARGET_LRA_P rs6000_lra_p
+
 #undef TARGET_CAN_ELIMINATE
 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
 
@@ -6226,7 +6230,7 @@  rs6000_legitimate_offset_address_p (enum
     return false;
   if (!reg_offset_addressing_ok_p (mode))
     return virtual_stack_registers_memory_p (x);
-  if (legitimate_constant_pool_address_p (x, mode, strict))
+  if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
     return true;
   if (GET_CODE (XEXP (x, 1)) != CONST_INT)
     return false;
@@ -6366,19 +6370,31 @@  legitimate_lo_sum_address_p (enum machin
 
   if (TARGET_ELF || TARGET_MACHO)
     {
+      bool large_toc_ok;
+
       if (DEFAULT_ABI != ABI_AIX && DEFAULT_ABI != ABI_DARWIN && flag_pic)
 	return false;
-      if (TARGET_TOC)
+      /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
+	 push_reload from reload pass code.  LEGITIMIZE_RELOAD_ADDRESS
+	 recognizes some LO_SUM addresses as valid although this
+	 function says opposite.  In most cases, LRA through different
+	 transformations can generate correct code for address reloads.
+	 It can not manage only some LO_SUM cases.  So we need to add
+	 code analogous to one in rs6000_legitimize_reload_address for
+	 LOW_SUM here saying that some addresses are still valid.  */
+      large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
+		      && small_toc_ref (x, VOIDmode));
+      if (TARGET_TOC && ! large_toc_ok)
 	return false;
       if (GET_MODE_NUNITS (mode) != 1)
 	return false;
-      if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
+      if (! lra_in_progress && GET_MODE_SIZE (mode) > UNITS_PER_WORD
 	  && !(/* ??? Assume floating point reg based on mode?  */
 	       TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
 	       && (mode == DFmode || mode == DDmode)))
 	return false;
 
-      return CONSTANT_P (x);
+      return CONSTANT_P (x) || large_toc_ok;
     }
 
   return false;
@@ -7368,7 +7384,8 @@  rs6000_legitimate_address_p (enum machin
   if (reg_offset_p && legitimate_small_data_p (mode, x))
     return 1;
   if (reg_offset_p
-      && legitimate_constant_pool_address_p (x, mode, reg_ok_strict))
+      && legitimate_constant_pool_address_p (x, mode,
+					     reg_ok_strict || lra_in_progress))
     return 1;
   /* For TImode, if we have load/store quad and TImode in VSX registers, only
      allow register indirect addresses.  This will allow the values to go in
@@ -7654,6 +7671,7 @@  rs6000_conditional_register_usage (void)
 	  fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
     }
 }
+
 
 /* Try to output insns to set TARGET equal to the constant C if it can
    be done in less than N insns.  Do all computations in MODE.
@@ -8058,6 +8076,68 @@  rs6000_emit_move (rtx dest, rtx source,
     cfun->machine->sdmode_stack_slot =
       eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
 
+
+  if (lra_in_progress
+      && mode == SDmode
+      && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
+      && reg_preferred_class (REGNO (operands[0])) == NO_REGS
+      && (REG_P (operands[1])
+	  || (GET_CODE (operands[1]) == SUBREG
+	      && REG_P (SUBREG_REG (operands[1])))))
+    {
+      int regno = REGNO (GET_CODE (operands[1]) == SUBREG
+			 ? SUBREG_REG (operands[1]) : operands[1]);
+      enum reg_class cl;
+
+      if (regno >= FIRST_PSEUDO_REGISTER)
+	{
+	  cl = reg_preferred_class (regno);
+	  gcc_assert (cl != NO_REGS);
+	  regno = ira_class_hard_regs[cl][0];
+	}
+      if (FP_REGNO_P (regno))
+	{
+	  if (GET_MODE (operands[0]) != DDmode)
+	    operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
+	  emit_insn (gen_movsd_store (operands[0], operands[1]));
+	}
+      else if (INT_REGNO_P (regno))
+	emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
+      else
+	gcc_unreachable();
+      return;
+    }
+  if (lra_in_progress
+      && mode == SDmode
+      && (REG_P (operands[0])
+	  || (GET_CODE (operands[0]) == SUBREG
+	      && REG_P (SUBREG_REG (operands[0]))))
+      && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
+      && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
+    {
+      int regno = REGNO (GET_CODE (operands[0]) == SUBREG
+			 ? SUBREG_REG (operands[0]) : operands[0]);
+      enum reg_class cl;
+
+      if (regno >= FIRST_PSEUDO_REGISTER)
+	{
+	  cl = reg_preferred_class (regno);
+	  gcc_assert (cl != NO_REGS);
+	  regno = ira_class_hard_regs[cl][0];
+	}
+      if (FP_REGNO_P (regno))
+	{
+	  if (GET_MODE (operands[1]) != DDmode)
+	    operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
+	  emit_insn (gen_movsd_load (operands[0], operands[1]));
+	}
+      else if (INT_REGNO_P (regno))
+	emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
+      else
+	gcc_unreachable();
+      return;
+    }
+
   if (reload_in_progress
       && mode == SDmode
       && cfun->machine->sdmode_stack_slot != NULL_RTX
@@ -14905,6 +14985,17 @@  rs6000_secondary_memory_needed_rtx (enum
   return ret;
 }
 
+/* Return the mode to be used for memory when a secondary memory
+   location is needed.  For SDmode values we need to use DDmode, in
+   all other cases we can use the same mode.  */
+enum machine_mode
+rs6000_secondary_memory_needed_mode (enum machine_mode mode)
+{
+  if (mode == SDmode)
+    return DDmode;
+  return mode;
+}
+
 static tree
 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
 {
@@ -15798,6 +15889,10 @@  rs6000_alloc_sdmode_stack_slot (void)
   gimple_stmt_iterator gsi;
 
   gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
+  /* We use a different approach for dealing with the secondary
+     memory in LRA.  */
+  if (ira_use_lra_p)
+    return;
 
   if (TARGET_NO_SDMODE_STACK)
     return;
@@ -16019,7 +16114,7 @@  rs6000_secondary_reload_class (enum reg_
   /* Constants, memory, and FP registers can go into FP registers.  */
   if ((regno == -1 || FP_REGNO_P (regno))
       && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
-    return (mode != SDmode) ? NO_REGS : GENERAL_REGS;
+    return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
 
   /* Memory, and FP/altivec registers can go into fp/altivec registers under
      VSX.  However, for scalar variables, use the traditional floating point
@@ -29364,6 +29459,13 @@  rs6000_libcall_value (enum machine_mode
 }
 
 
+/* Return true if we use LRA instead of reload pass.  */
+static bool
+rs6000_lra_p (void)
+{
+  return rs6000_lra_flag;
+}
+
 /* Given FROM and TO register numbers, say whether this elimination is allowed.
    Frame pointer elimination is automatically handled.
 
Index: config/rs6000/rs6000.h
===================================================================
--- config/rs6000/rs6000.h	(revision 204075)
+++ config/rs6000/rs6000.h	(working copy)
@@ -1488,6 +1488,13 @@  extern enum reg_class rs6000_constraints
 #define SECONDARY_MEMORY_NEEDED_RTX(MODE) \
   rs6000_secondary_memory_needed_rtx (MODE)
 
+/* Specify the mode to be used for memory when a secondary memory
+   location is needed.  For cpus that cannot load/store SDmode values
+   from the 64-bit FP registers without using a full 64-bit
+   load/store, we need a wider mode.  */
+#define SECONDARY_MEMORY_NEEDED_MODE(MODE)		\
+  rs6000_secondary_memory_needed_mode (MODE)
+
 /* Return the maximum number of consecutive registers
    needed to represent mode MODE in a register of class CLASS.
 
Index: config/rs6000/rs6000.opt
===================================================================
--- config/rs6000/rs6000.opt	(revision 204075)
+++ config/rs6000/rs6000.opt	(working copy)
@@ -449,6 +449,10 @@  mlong-double-
 Target RejectNegative Joined UInteger Var(rs6000_long_double_type_size) Save
 -mlong-double-<n>	Specify size of long double (64 or 128 bits)
 
+mlra
+Target Report Var(rs6000_lra_flag) Init(0) Save
+Use LRA instead of reload
+
 msched-costly-dep=
 Target RejectNegative Joined Var(rs6000_sched_costly_dep_str)
 Determine which dependences between insns are considered costly
Index: lra-spills.c
===================================================================
--- lra-spills.c	(revision 204075)
+++ lra-spills.c	(working copy)
@@ -625,7 +625,7 @@  lra_final_code_change (void)
 {
   int i, hard_regno;
   basic_block bb;
-  rtx insn, curr;
+  rtx insn, curr, set;
   int max_regno = max_reg_num ();
 
   for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++)
@@ -661,5 +661,19 @@  lra_final_code_change (void)
 	      }
 	  if (insn_change_p)
 	    lra_update_operator_dups (id);
+
+	  if ((set = single_set (insn)) != NULL
+	      && REG_P (SET_SRC (set)) && REG_P (SET_DEST (set))
+	      && REGNO (SET_SRC (set)) == REGNO (SET_DEST (set)))
+	    {
+	      /* Remove an useless move insn.  IRA can generate move
+		 insns involving pseudos.  It is better remove them
+		 earlier to speed up compiler a bit.  It is also
+		 better to do it here as they might not pass final RTL
+		 check in LRA, (e.g. insn moving a control register
+		 into itself).  */
+	      lra_invalidate_insn_data (insn);
+	      delete_insn (insn);
+	    }
 	}
 }