diff mbox series

[pushed] aarch64: Tighten early-ra chain test for wide registers [PR113295]

Message ID mpt34tjjmh8.fsf@arm.com
State New
Headers show
Series [pushed] aarch64: Tighten early-ra chain test for wide registers [PR113295] | expand

Commit Message

Richard Sandiford Feb. 23, 2024, 2:15 p.m. UTC
Most code in early-ra used is_chain_candidate to check whether we
should chain two allocnos.  This included both tests that matter
for correctness and tests for certain heuristics.

Once that test passes for one pair of allocnos, we test whether
it's safe to chain the containing groups (which might contain
multiple allocnos for x2, x3 and x4 modes).  This test used an
inline test for correctness only, deliberately skipping the
heuristics.  However, this instance of the test was missing
some handling of equivalent allocnos.

This patch fixes things by making is_chain_candidate take a
strictness parameter: correctness only, or correctness + heuristics.
It then makes the group-chaining test use the correctness version
rather than trying to replicate it inline.

Tested on aarch64-linux-gnu & pushed.

Richard


gcc/
	PR target/113295
	* config/aarch64/aarch64-early-ra.cc
	(early_ra::test_strictness): New enum.
	(early_ra::is_chain_candidate): Add a strictness parameter to
	control whether only correctness matters, or whether both correctness
	and heuristics should be used.  Handle multiple levels of equivalence.
	(early_ra::find_related_start): Update call accordingly.
	(early_ra::strided_polarity_pref): Likewise.
	(early_ra::form_chains): Likewise.
	(early_ra::try_to_chain_allocnos): Use is_chain_candidate in
	correctness mode rather than trying to inline the test.

gcc/testsuite/
	PR target/113295
	* gcc.target/aarch64/pr113295-2.c: New test.
---
 gcc/config/aarch64/aarch64-early-ra.cc        | 48 ++++++++--------
 gcc/testsuite/gcc.target/aarch64/pr113295-2.c | 57 +++++++++++++++++++
 2 files changed, 82 insertions(+), 23 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr113295-2.c
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64-early-ra.cc b/gcc/config/aarch64/aarch64-early-ra.cc
index 58ae5a49913..9ac9ec1bb0d 100644
--- a/gcc/config/aarch64/aarch64-early-ra.cc
+++ b/gcc/config/aarch64/aarch64-early-ra.cc
@@ -95,6 +95,10 @@  public:
   void execute ();
 
 private:
+  // Whether to test only things that are required for correctness,
+  // or whether to take optimization heuristics into account as well.
+  enum test_strictness { CORRECTNESS_ONLY, ALL_REASONS };
+
   static_assert (MAX_RECOG_OPERANDS <= 32, "Operand mask is 32 bits");
   using operand_mask = uint32_t;
 
@@ -452,7 +456,7 @@  private:
 
   template<unsigned int allocno_info::*field>
   static int cmp_increasing (const void *, const void *);
-  bool is_chain_candidate (allocno_info *, allocno_info *);
+  bool is_chain_candidate (allocno_info *, allocno_info *, test_strictness);
   int rate_chain (allocno_info *, allocno_info *);
   static int cmp_chain_candidates (const void *, const void *);
   void chain_allocnos (unsigned int &, unsigned int &);
@@ -1588,7 +1592,7 @@  early_ra::find_related_start (allocno_info *dest_allocno,
 	return res;
 
       auto *next_allocno = m_allocnos[dest_allocno->copy_dest];
-      if (!is_chain_candidate (dest_allocno, next_allocno))
+      if (!is_chain_candidate (dest_allocno, next_allocno, ALL_REASONS))
 	return res;
 
       dest_allocno = next_allocno;
@@ -2011,7 +2015,7 @@  early_ra::strided_polarity_pref (allocno_info *allocno1,
   if (allocno1->offset + 1 < allocno1->group_size
       && allocno2->offset + 1 < allocno2->group_size)
     {
-      if (is_chain_candidate (allocno1 + 1, allocno2 + 1))
+      if (is_chain_candidate (allocno1 + 1, allocno2 + 1, ALL_REASONS))
 	return 1;
       else
 	return -1;
@@ -2019,7 +2023,7 @@  early_ra::strided_polarity_pref (allocno_info *allocno1,
 
   if (allocno1->offset > 0 && allocno2->offset > 0)
     {
-      if (is_chain_candidate (allocno1 - 1, allocno2 - 1))
+      if (is_chain_candidate (allocno1 - 1, allocno2 - 1, ALL_REASONS))
 	return 1;
       else
 	return -1;
@@ -2215,38 +2219,37 @@  early_ra::cmp_increasing (const void *allocno1_ptr, const void *allocno2_ptr)
 }
 
 // Return true if we should consider chaining ALLOCNO1 onto the head
-// of ALLOCNO2.  This is just a local test of the two allocnos; it doesn't
-// guarantee that chaining them would give a self-consistent system.
+// of ALLOCNO2.  STRICTNESS says whether we should take copy-elision
+// heuristics into account, or whether we should just consider things
+// that matter for correctness.
+//
+// This is just a local test of the two allocnos; it doesn't guarantee
+// that chaining them would give a self-consistent system.
 bool
-early_ra::is_chain_candidate (allocno_info *allocno1, allocno_info *allocno2)
+early_ra::is_chain_candidate (allocno_info *allocno1, allocno_info *allocno2,
+			      test_strictness strictness)
 {
   if (allocno2->is_shared ())
     return false;
 
-  if (allocno1->is_equiv)
+  while (allocno1->is_equiv)
     allocno1 = m_allocnos[allocno1->related_allocno];
 
   if (allocno2->start_point >= allocno1->end_point
       && !allocno2->is_equiv_to (allocno1->id))
     return false;
 
-  if (allocno2->is_strong_copy_dest)
-    {
-      if (!allocno1->is_strong_copy_src
-	  || allocno1->copy_dest != allocno2->id)
-	return false;
-    }
-  else if (allocno2->is_copy_dest)
+  if (allocno1->is_earlyclobbered
+      && allocno1->end_point == allocno2->start_point + 1)
+    return false;
+
+  if (strictness == ALL_REASONS && allocno2->is_copy_dest)
     {
       if (allocno1->copy_dest != allocno2->id)
 	return false;
-    }
-  else if (allocno1->is_earlyclobbered)
-    {
-      if (allocno1->end_point == allocno2->start_point + 1)
+      if (allocno2->is_strong_copy_dest && !allocno1->is_strong_copy_src)
 	return false;
     }
-
   return true;
 }
 
@@ -2470,8 +2473,7 @@  early_ra::try_to_chain_allocnos (allocno_info *allocno1,
 	  auto *head2 = m_allocnos[headi2];
 	  if (head1->chain_next != INVALID_ALLOCNO)
 	    return false;
-	  if (!head2->is_equiv_to (head1->id)
-	      && head1->end_point <= head2->start_point)
+	  if (!is_chain_candidate (head1, head2, CORRECTNESS_ONLY))
 	    return false;
 	}
     }
@@ -2620,7 +2622,7 @@  early_ra::form_chains ()
 	  auto *allocno2 = m_sorted_allocnos[sci];
 	  if (allocno2->chain_prev == INVALID_ALLOCNO)
 	    {
-	      if (!is_chain_candidate (allocno1, allocno2))
+	      if (!is_chain_candidate (allocno1, allocno2, ALL_REASONS))
 		continue;
 	      chain_candidate_info candidate;
 	      candidate.allocno = allocno2;
diff --git a/gcc/testsuite/gcc.target/aarch64/pr113295-2.c b/gcc/testsuite/gcc.target/aarch64/pr113295-2.c
new file mode 100644
index 00000000000..6fa29bdfc05
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr113295-2.c
@@ -0,0 +1,57 @@ 
+// { dg-do run }
+// { dg-options "-O2" }
+
+#include <arm_neon.h>
+
+void __attribute__ ((noinline))
+foo (int8_t **ptr)
+{
+  int8x16_t v0 = vld1q_s8 (ptr[0]);
+  int8x16_t v1 = vld1q_s8 (ptr[1]);
+  int8x16_t v2 = vld1q_s8 (ptr[2]);
+  int8x16_t v3 = vld1q_s8 (ptr[3]);
+  int8x16_t v4 = vld1q_s8 (ptr[4]);
+
+  int8x16x4_t res0 = { v0, v1, v2, v3 };
+  vst4q_s8 (ptr[5], res0);
+
+  int8x16_t add = vaddq_s8 (v2, v3);
+  int8x16x3_t res1 = { v1, add, v3 };
+  vst3q_s8 (ptr[6], res1);
+
+  int8x16x3_t res2 = { v0, v1, v2 };
+  vst3q_s8 (ptr[7], res2);
+}
+
+int8_t arr0[16] = { 1 };
+int8_t arr1[16] = { 2 };
+int8_t arr2[16] = { 4 };
+int8_t arr3[16] = { 8 };
+int8_t arr4[16] = { 16 };
+int8_t arr5[16 * 4];
+int8_t arr6[16 * 3];
+int8_t arr7[16 * 3];
+int8_t *ptr[] =
+{
+  arr0,
+  arr1,
+  arr2,
+  arr3,
+  arr4,
+  arr5,
+  arr6,
+  arr7
+};
+
+int
+main (void)
+{
+  foo (ptr);
+  if (arr5[0] != 1 || arr5[1] != 2 || arr5[2] != 4 || arr5[3] != 8)
+    __builtin_abort ();
+  if (arr6[0] != 2 || arr6[1] != 12 || arr6[2] != 8)
+    __builtin_abort ();
+  if (arr7[0] != 1 || arr7[1] != 2 || arr7[2] != 4)
+    __builtin_abort ();
+  return 0;
+}