diff mbox series

[pushed] aarch64: Add missing early-ra bookkeeping [PR113295]

Message ID mpt8r3bjmiu.fsf@arm.com
State New
Headers show
Series [pushed] aarch64: Add missing early-ra bookkeeping [PR113295] | expand

Commit Message

Richard Sandiford Feb. 23, 2024, 2:14 p.m. UTC
416.gamess showed up two wrong-code bugs in early-ra.  This patch
fixes the first of them.  It was difficult to reduce the source code
to something that would meaningfully show the situation, so the
testcase uses a direct RTL sequence instead.

In the sequence:

(a) register <2> is set more than once
(b) register <2> is copied to a temporary (<4>)
(c) register <2> is the destination of an FCSEL between <4> and
    another value (<5>)
(d) <4> and <2> are equivalent for <4>'s live range
(e) <5>'s and <2>'s live ranges do not intersect, and there is
    a pseudo-copy between <5> and <2>

On its own, (d) implies that <4> can be treated as equivalent to <2>.
And on its own, (e) implies that <5> can share <2>'s register.  But
<4>'s and <5>'s live ranges conflict, meaning that they cannot both
share the register together.  A bit of missing bookkeeping meant that
the mechanism for detecting this didn't fire.  We therefore ended up
with an FCSEL in which both inputs were the same register.

Tested on aarch64-linux-gnu & pushed.

Richard


gcc/
	PR target/113295
	* config/aarch64/aarch64-early-ra.cc
	(early_ra::find_related_start): Account for definitions by shared
	registers when testing for a single register definition.
	(early_ra::accumulate_defs): New function.
	(early_ra::record_copy): If A shares B's register, fold A's
	definition information into B's.  Fold A's use information into B's.

gcc/testsuite/
	PR target/113295
	* gcc.dg/rtl/aarch64/pr113295-1.c: New test.
---
 gcc/config/aarch64/aarch64-early-ra.cc        | 27 ++++++++++
 gcc/testsuite/gcc.dg/rtl/aarch64/pr113295-1.c | 53 +++++++++++++++++++
 2 files changed, 80 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/pr113295-1.c
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64-early-ra.cc b/gcc/config/aarch64/aarch64-early-ra.cc
index 1a03d86e94b..58ae5a49913 100644
--- a/gcc/config/aarch64/aarch64-early-ra.cc
+++ b/gcc/config/aarch64/aarch64-early-ra.cc
@@ -440,6 +440,7 @@  private:
   void record_allocno_use (allocno_info *);
   void record_allocno_def (allocno_info *);
   allocno_info *find_related_start (allocno_info *, allocno_info *, bool);
+  void accumulate_defs (allocno_info *, allocno_info *);
   void record_copy (rtx, rtx, bool = false);
   void record_constraints (rtx_insn *);
   void record_artificial_refs (unsigned int);
@@ -1569,6 +1570,8 @@  early_ra::find_related_start (allocno_info *dest_allocno,
 	}
 
       if (dest_allocno->group_size != 1
+	  // Account for definitions by shared registers.
+	  || dest_allocno->num_defs > 1
 	  || DF_REG_DEF_COUNT (dest_allocno->group ()->regno) != 1)
 	// Currently only single allocnos that are defined once can
 	// share registers with non-equivalent allocnos.  This could be
@@ -1593,6 +1596,20 @@  early_ra::find_related_start (allocno_info *dest_allocno,
     }
 }
 
+// Add FROM_ALLOCNO's definition information to TO_ALLOCNO's.
+void
+early_ra::accumulate_defs (allocno_info *to_allocno,
+			   allocno_info *from_allocno)
+{
+  if (from_allocno->num_defs > 0)
+    {
+      to_allocno->num_defs = MIN (from_allocno->num_defs
+				  + to_allocno->num_defs, 2);
+      to_allocno->last_def_point = MAX (to_allocno->last_def_point,
+					from_allocno->last_def_point);
+    }
+}
+
 // Record any relevant allocno-related information for an actual or imagined
 // copy from SRC to DEST.  FROM_MOVE_P is true if the copy was an explicit
 // move instruction, false if it represents one way of satisfying the previous
@@ -1687,6 +1704,16 @@  early_ra::record_copy (rtx dest, rtx src, bool from_move_p)
 		  next_allocno->related_allocno = src_allocno->id;
 		  next_allocno->is_equiv = (start_allocno == dest_allocno
 					    && from_move_p);
+		  // If we're sharing two allocnos that are not equivalent,
+		  // carry across the definition information.  This is needed
+		  // to prevent multiple incompatible attempts to share with
+		  // the same register.
+		  if (next_allocno->is_shared ())
+		    accumulate_defs (src_allocno, next_allocno);
+		  src_allocno->last_use_point
+		    = MAX (src_allocno->last_use_point,
+			   next_allocno->last_use_point);
+
 		  if (next_allocno == start_allocno)
 		    break;
 		  next_allocno = m_allocnos[next_allocno->copy_dest];
diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/pr113295-1.c b/gcc/testsuite/gcc.dg/rtl/aarch64/pr113295-1.c
new file mode 100644
index 00000000000..481fb813f61
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/aarch64/pr113295-1.c
@@ -0,0 +1,53 @@ 
+// { dg-options "-O2" }
+// { dg-do run }
+
+struct data {
+  double x;
+  double y;
+  long long cond;
+  double res;
+};
+
+void __RTL (startwith ("early_ra")) foo (struct data *ptr)
+{
+  (function "foo"
+    (param "ptr"
+      (DECL_RTL (reg/v:DI <0> [ ptr ]))
+      (DECL_RTL_INCOMING (reg/v:DI x0 [ ptr ]))
+    ) ;; param "ptr"
+    (insn-chain
+      (block 2
+	(edge-from entry (flags "FALLTHRU"))
+	(cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+	(insn 4 (set (reg:DI <0>) (reg:DI x0)))
+	(insn 5 (set (reg:DF <1>) (mem:DF (reg:DI <0>) [1 S8 A8])))
+        (insn 6 (set (reg:DF <2>) (mem:DF (plus:DI (reg:DI <0>)
+						   (const_int 8)) [1 S8 A8])))
+        (insn 7 (set (reg:DI <3>) (mem:DI (plus:DI (reg:DI <0>)
+						   (const_int 16)) [1 S8 A8])))
+        (insn 8 (set (reg:CC cc) (compare:CC (reg:DI <3>) (const_int 0))))
+        (insn 9 (set (reg:DF <4>) (reg:DF <2>)))
+	(insn 10 (set (reg:DF <5>) (plus:DF (reg:DF <1>) (reg:DF <2>))))
+	(insn 11 (set (reg:DF <2>) (if_then_else:DF
+				     (ge (reg:CC cc) (const_int 0))
+				     (reg:DF <4>)
+				     (reg:DF <5>))))
+        (insn 12 (set (mem:DF (plus:DI (reg:DI <0>)
+				       (const_int 24)) [1 S8 A8])
+		      (reg:DF <2>)))
+	(edge-to exit (flags "FALLTHRU"))
+      ) ;; block 2
+    ) ;; insn-chain
+  ) ;; function
+}
+
+int
+main (void)
+{
+  struct data d1 = { 1, 2, -1, 0 };
+  struct data d2 = { 3, 4, 1, 0 };
+  foo (&d1);
+  foo (&d2);
+  if (d1.res != 3 || d2.res != 4)
+    __builtin_abort ();
+}