[4/4,Aarch64] v2: Implement Aarch64 SIMD ABI

Message ID 1541699749.12016.9.camel@cavium.com
State New
Headers show
Series
  • v2: Implement Aarch64 SIMD ABI
Related show

Commit Message

Steve Ellcey Nov. 8, 2018, 5:55 p.m.
This is a patch 4 to support the Aarch64 SIMD ABI [1] in GCC.

It defines a new target hook targetm.check_part_clobbered that
takes a rtx_insn and checks to see if it is a call to a function
that may clobber partial registers.  It returns true by default,
which results in the current behaviour, but if we can determine
that the function will not do any partial clobbers (like the
Aarch64 SIMD functions) then it returns false.

Steve Ellcey
sellcey@cavium.com



2018-11-08  Steve Ellcey  <sellcey@cavium.com>

	* config/aarch64/aarch64.c (aarch64_check_part_clobbered): New function.
	(TARGET_CHECK_PART_CLOBBERED): New macro.
	* doc/tm.texi.in (TARGET_CHECK_PART_CLOBBERED): New hook.
	* lra-constraints.c (need_for_call_save_p): Use check_part_clobbered.
	* lra-int.h (check_part_clobbered): New field in lra_reg struct.
	* lra-lives.c (check_pseudos_live_through_calls): Pass in
	check_partial_clobber bool argument and use it.
	(process_bb_lives): Check basic block for functions that may do
	partial clobbers.  Pass this to check_pseudos_live_through_calls.
	* lra.c (initialize_lra_reg_info_element): Inialize 
	check_part_clobbered to false.
	* target.def (check_part_clobbered): New target hook.

Patch

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index c82c7b6..c2de4111 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1480,6 +1480,17 @@  aarch64_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
   return FP_REGNUM_P (regno) && maybe_gt (GET_MODE_SIZE (mode), 8);
 }
 
+/* Implement TARGET_CHECK_PART_CLOBBERED.  SIMD functions never save
+   partial registers, so they return false.  */
+
+static bool
+aarch64_check_part_clobbered(rtx_insn *insn)
+{
+  if (aarch64_simd_call_p (insn))
+    return false;
+  return true;
+}
+
 /* Implement REGMODE_NATURAL_SIZE.  */
 poly_uint64
 aarch64_regmode_natural_size (machine_mode mode)
@@ -18294,6 +18305,9 @@  aarch64_libgcc_floating_mode_supported_p
 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
   aarch64_hard_regno_call_part_clobbered
 
+#undef TARGET_CHECK_PART_CLOBBERED
+#define TARGET_CHECK_PART_CLOBBERED aarch64_check_part_clobbered
+
 #undef TARGET_CONSTANT_ALIGNMENT
 #define TARGET_CONSTANT_ALIGNMENT aarch64_constant_alignment
 
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index e8af1bf..7dd6c54 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -1704,6 +1704,8 @@  of @code{CALL_USED_REGISTERS}.
 @cindex call-saved register
 @hook TARGET_HARD_REGNO_CALL_PART_CLOBBERED
 
+@hook TARGET_CHECK_PART_CLOBBERED
+
 @findex fixed_regs
 @findex call_used_regs
 @findex global_regs
diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c
index ab61989..89483d3 100644
--- a/gcc/lra-constraints.c
+++ b/gcc/lra-constraints.c
@@ -5325,16 +5325,23 @@  inherit_reload_reg (bool def_p, int original_regno,
 static inline bool
 need_for_call_save_p (int regno)
 {
+  machine_mode pmode = PSEUDO_REGNO_MODE (regno);
+  int new_regno = reg_renumber[regno];
+
   lra_assert (regno >= FIRST_PSEUDO_REGISTER && reg_renumber[regno] >= 0);
-  return (usage_insns[regno].calls_num < calls_num
-	  && (overlaps_hard_reg_set_p
-	      ((flag_ipa_ra &&
-		! hard_reg_set_empty_p (lra_reg_info[regno].actual_call_used_reg_set))
-	       ? lra_reg_info[regno].actual_call_used_reg_set
-	       : call_used_reg_set,
-	       PSEUDO_REGNO_MODE (regno), reg_renumber[regno])
-	      || (targetm.hard_regno_call_part_clobbered
-		  (reg_renumber[regno], PSEUDO_REGNO_MODE (regno)))));
+
+  if (usage_insns[regno].calls_num >= calls_num)
+    return false;
+
+  if (flag_ipa_ra
+      && !hard_reg_set_empty_p (lra_reg_info[regno].actual_call_used_reg_set))
+    return (overlaps_hard_reg_set_p
+		(lra_reg_info[regno].actual_call_used_reg_set, pmode, new_regno)
+	    || (lra_reg_info[regno].check_part_clobbered
+		&& targetm.hard_regno_call_part_clobbered (new_regno, pmode)));
+  else
+    return (overlaps_hard_reg_set_p (call_used_reg_set, pmode, new_regno)
+            || targetm.hard_regno_call_part_clobbered (new_regno, pmode));
 }
 
 /* Global registers occurring in the current EBB.  */
diff --git a/gcc/lra-int.h b/gcc/lra-int.h
index 5267b53..e6aacd2 100644
--- a/gcc/lra-int.h
+++ b/gcc/lra-int.h
@@ -117,6 +117,8 @@  struct lra_reg
   /* This member is set up in lra-lives.c for subsequent
      assignments.  */
   lra_copy_t copies;
+  /* Whether or not the register is partially clobbered.  */
+  bool check_part_clobbered;
 };
 
 /* References to the common info about each register.  */
diff --git a/gcc/lra-lives.c b/gcc/lra-lives.c
index 0bf8cd0..b2dfe0e 100644
--- a/gcc/lra-lives.c
+++ b/gcc/lra-lives.c
@@ -597,7 +597,8 @@  lra_setup_reload_pseudo_preferenced_hard_reg (int regno,
    PSEUDOS_LIVE_THROUGH_CALLS and PSEUDOS_LIVE_THROUGH_SETJUMPS.  */
 static inline void
 check_pseudos_live_through_calls (int regno,
-				  HARD_REG_SET last_call_used_reg_set)
+				  HARD_REG_SET last_call_used_reg_set,
+				  bool check_partial_clobber)
 {
   int hr;
 
@@ -607,11 +608,12 @@  check_pseudos_live_through_calls (int regno,
   IOR_HARD_REG_SET (lra_reg_info[regno].conflict_hard_regs,
 		    last_call_used_reg_set);
 
-  for (hr = 0; hr < FIRST_PSEUDO_REGISTER; hr++)
-    if (targetm.hard_regno_call_part_clobbered (hr,
-						PSEUDO_REGNO_MODE (regno)))
-      add_to_hard_reg_set (&lra_reg_info[regno].conflict_hard_regs,
-			   PSEUDO_REGNO_MODE (regno), hr);
+  if (check_partial_clobber)
+    for (hr = 0; hr < FIRST_PSEUDO_REGISTER; hr++)
+      if (targetm.hard_regno_call_part_clobbered (hr,
+						  PSEUDO_REGNO_MODE (regno)))
+        add_to_hard_reg_set (&lra_reg_info[regno].conflict_hard_regs,
+			     PSEUDO_REGNO_MODE (regno), hr);
   lra_reg_info[regno].call_p = true;
   if (! sparseset_bit_p (pseudos_live_through_setjumps, regno))
     return;
@@ -652,6 +654,7 @@  process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p)
   rtx_insn *next;
   rtx link, *link_loc;
   bool need_curr_point_incr;
+  bool partial_clobber_in_bb;
   HARD_REG_SET last_call_used_reg_set;
   
   reg_live_out = df_get_live_out (bb);
@@ -673,6 +676,18 @@  process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p)
   if (lra_dump_file != NULL)
     fprintf (lra_dump_file, "  BB %d\n", bb->index);
 
+  /* Check to see if any call might do a partial clobber.  */
+  partial_clobber_in_bb = false;
+  FOR_BB_INSNS_REVERSE_SAFE (bb, curr_insn, next)
+    {
+      if (CALL_P (curr_insn)
+          && targetm.check_part_clobbered (curr_insn))
+        {
+          partial_clobber_in_bb = true;
+          break;
+        }
+    }
+
   /* Scan the code of this basic block, noting which pseudos and hard
      regs are born or die.
 
@@ -850,7 +865,8 @@  process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p)
 		|= mark_regno_live (reg->regno, reg->biggest_mode,
 				    curr_point);
 	      check_pseudos_live_through_calls (reg->regno,
-						last_call_used_reg_set);
+						last_call_used_reg_set,
+						partial_clobber_in_bb);
 	    }
 
 	  if (reg->regno >= FIRST_PSEUDO_REGISTER)
@@ -913,9 +929,14 @@  process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p)
 		{
 		  IOR_HARD_REG_SET (lra_reg_info[j].actual_call_used_reg_set,
 				    this_call_used_reg_set);
+
+		  if (targetm.check_part_clobbered (curr_insn))
+		    lra_reg_info[j].check_part_clobbered = true;
+
 		  if (flush)
-		    check_pseudos_live_through_calls
-		      (j, last_call_used_reg_set);
+		    check_pseudos_live_through_calls (j,
+						      last_call_used_reg_set,
+						      partial_clobber_in_bb);
 		}
 	      COPY_HARD_REG_SET(last_call_used_reg_set, this_call_used_reg_set);
 	    }
@@ -946,7 +967,8 @@  process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p)
 	      |= mark_regno_live (reg->regno, reg->biggest_mode,
 				  curr_point);
 	    check_pseudos_live_through_calls (reg->regno,
-					      last_call_used_reg_set);
+					      last_call_used_reg_set,
+					      partial_clobber_in_bb);
 	  }
 
       for (reg = curr_static_id->hard_regs; reg != NULL; reg = reg->next)
@@ -1102,7 +1124,9 @@  process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p)
       if (sparseset_cardinality (pseudos_live_through_calls) == 0)
 	break;
       if (sparseset_bit_p (pseudos_live_through_calls, j))
-	check_pseudos_live_through_calls (j, last_call_used_reg_set);
+	check_pseudos_live_through_calls (j,
+					  last_call_used_reg_set,
+					  partial_clobber_in_bb);
     }
 
   for (i = 0; i < FIRST_PSEUDO_REGISTER; ++i)
diff --git a/gcc/lra.c b/gcc/lra.c
index 5d58d90..8831286 100644
--- a/gcc/lra.c
+++ b/gcc/lra.c
@@ -1344,6 +1344,7 @@  initialize_lra_reg_info_element (int i)
   lra_reg_info[i].val = get_new_reg_value ();
   lra_reg_info[i].offset = 0;
   lra_reg_info[i].copies = NULL;
+  lra_reg_info[i].check_part_clobbered = false;
 }
 
 /* Initialize common reg info and copies.  */
diff --git a/gcc/target.def b/gcc/target.def
index 4b166d1..b3c2c72 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -5757,6 +5757,15 @@  for targets that don't have partly call-clobbered registers.",
  bool, (unsigned int regno, machine_mode mode),
  hook_bool_uint_mode_false)
 
+DEFHOOK
+(
+ check_part_clobbered,
+ "This hook should return true if the function @var{insn} must obey\n\
+ the hard_regno_call_part_clobbered target function.  False if can ignore\n\
+ it because we know the function will not partially clobber any registers.",
+ bool, (rtx_insn *insn),
+ hook_bool_rtx_insn_true)
+
 /* Return the smallest number of different values for which it is best to
    use a jump-table instead of a tree of conditional branches.  */
 DEFHOOK