diff mbox

Fix combiner i3/i2 pattern splitting (PR rtl-optimization/45695)

Message ID 20100917143430.GC1269@tyan-ft48-01.lab.bos.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Sept. 17, 2010, 2:34 p.m. UTC
Hi!

In the PR rtl-optimization/44858 fix I've been assuming that the comment
claiming the order doesn't really matter and HAVE_cc0 implementation are
right, unfortunately it seems it was wrong.  We tested unconditionally
          && ! use_crosses_set_p (SET_SRC (XVECEXP (newpat, 0, 1)),
                                  DF_INSN_LUID (i2))
but for HAVE_cc0 could have chosen to put XVECEXP (newpat, 0, 0)
into i2 instead of i3 (then the use_crosses_set_p test is irrelevant
to it and nothing tests that the pattern we want to put into i2
doesn't depend on something set by insns in between i2 and i3.
And my patch of course made things even worse, as it tries to swap
the patterns if added clobbers disallow the initially chosen order, even
on !HAVE_cc0 targets.

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok
for trunk/4.5?

2010-09-17  Jakub Jelinek  <jakub@redhat.com>

	PR rtl-optimization/45695
	* combine.c (try_combine): When splitting a two set pattern,
	make sure the pattern which will be put into i2 doesn't use REGs
	or MEMs set by insns in between i2 and i3.

	* gcc.c-torture/execute/pr45695.c: New test.


	Jakub

Comments

Eric Botcazou Sept. 18, 2010, 3:17 p.m. UTC | #1
> In the PR rtl-optimization/44858 fix I've been assuming that the comment
> claiming the order doesn't really matter and HAVE_cc0 implementation are
> right, unfortunately it seems it was wrong.  We tested unconditionally
>           && ! use_crosses_set_p (SET_SRC (XVECEXP (newpat, 0, 1)),
>                                   DF_INSN_LUID (i2))
> but for HAVE_cc0 could have chosen to put XVECEXP (newpat, 0, 0)
> into i2 instead of i3 (then the use_crosses_set_p test is irrelevant
> to it and nothing tests that the pattern we want to put into i2
> doesn't depend on something set by insns in between i2 and i3.
> And my patch of course made things even worse, as it tries to swap
> the patterns if added clobbers disallow the initially chosen order, even
> on !HAVE_cc0 targets.

How about erring on the side of caution and trimming down the fix for 44858
to make it identical to that of 20322?  Would that pessimize too much?
Jakub Jelinek Sept. 18, 2010, 4:05 p.m. UTC | #2
On Sat, Sep 18, 2010 at 05:17:58PM +0200, Eric Botcazou wrote:
> > In the PR rtl-optimization/44858 fix I've been assuming that the comment
> > claiming the order doesn't really matter and HAVE_cc0 implementation are
> > right, unfortunately it seems it was wrong.  We tested unconditionally
> >           && ! use_crosses_set_p (SET_SRC (XVECEXP (newpat, 0, 1)),
> >                                   DF_INSN_LUID (i2))
> > but for HAVE_cc0 could have chosen to put XVECEXP (newpat, 0, 0)
> > into i2 instead of i3 (then the use_crosses_set_p test is irrelevant
> > to it and nothing tests that the pattern we want to put into i2
> > doesn't depend on something set by insns in between i2 and i3.
> > And my patch of course made things even worse, as it tries to swap
> > the patterns if added clobbers disallow the initially chosen order, even
> > on !HAVE_cc0 targets.
> 
> How about erring on the side of caution and trimming down the fix for 44858
> to make it identical to that of 20322?  Would that pessimize too much?

HAVE_cc0 targets would still be buggy.  If you want I can gather statistics
on how many cases would match with the swapped patterns compared to without
that.


	Jakub
Eric Botcazou Sept. 18, 2010, 4:23 p.m. UTC | #3
> HAVE_cc0 targets would still be buggy.

But not more than before the fix, right?

> If you want I can gather statistics on how many cases would match with the
> swapped patterns compared to without that.

Yes, that would be nice, for example on the core compiler itself at -O2, TIA.
diff mbox

Patch

--- gcc/combine.c.jj	2010-08-30 18:51:10.000000000 +0200
+++ gcc/combine.c	2010-09-17 13:17:53.240646742 +0200
@@ -3509,36 +3509,43 @@  try_combine (rtx i3, rtx i2, rtx i1, int
 	   && GET_CODE (XVECEXP (newpat, 0, 1)) == SET
 	   && GET_CODE (SET_DEST (XVECEXP (newpat, 0, 1))) != ZERO_EXTRACT
 	   && GET_CODE (SET_DEST (XVECEXP (newpat, 0, 1))) != STRICT_LOW_PART
-	   && ! use_crosses_set_p (SET_SRC (XVECEXP (newpat, 0, 1)),
-				   DF_INSN_LUID (i2))
 	   && ! reg_referenced_p (SET_DEST (XVECEXP (newpat, 0, 1)),
 				  XVECEXP (newpat, 0, 0))
 	   && ! reg_referenced_p (SET_DEST (XVECEXP (newpat, 0, 0)),
 				  XVECEXP (newpat, 0, 1))
 	   && ! (contains_muldiv (SET_SRC (XVECEXP (newpat, 0, 0)))
-		 && contains_muldiv (SET_SRC (XVECEXP (newpat, 0, 1))))
-#ifdef HAVE_cc0
-	   /* We cannot split the parallel into two sets if both sets
-	      reference cc0.  */
-	   && ! (reg_referenced_p (cc0_rtx, XVECEXP (newpat, 0, 0))
-		 && reg_referenced_p (cc0_rtx, XVECEXP (newpat, 0, 1)))
-#endif
-	   )
+		 && contains_muldiv (SET_SRC (XVECEXP (newpat, 0, 1)))))
     {
       /* Normally, it doesn't matter which of the two is done first,
-	 but it does if one references cc0.  In that case, it has to
+	 but the one that references cc0 can't be the second, and
+	 one which uses any regs/memory set in between i2 and i3 can't
 	 be first.  */
+      bool pat0_in_i2_ok
+	= !use_crosses_set_p (SET_SRC (XVECEXP (newpat, 0, 0)),
+			      DF_INSN_LUID (i2));
+      bool pat1_in_i2_ok
+	= !use_crosses_set_p (SET_SRC (XVECEXP (newpat, 0, 1)),
+			      DF_INSN_LUID (i2));
 #ifdef HAVE_cc0
       if (reg_referenced_p (cc0_rtx, XVECEXP (newpat, 0, 0)))
+	pat1_in_i2_ok = false;
+      if (reg_referenced_p (cc0_rtx, XVECEXP (newpat, 0, 1)))
+	pat0_in_i2_ok = false;
+#endif
+      if (pat1_in_i2_ok)
+	{
+	  newi2pat = XVECEXP (newpat, 0, 1);
+	  newpat = XVECEXP (newpat, 0, 0);
+	}
+      else if (pat0_in_i2_ok)
 	{
 	  newi2pat = XVECEXP (newpat, 0, 0);
 	  newpat = XVECEXP (newpat, 0, 1);
 	}
       else
-#endif
 	{
-	  newi2pat = XVECEXP (newpat, 0, 1);
-	  newpat = XVECEXP (newpat, 0, 0);
+	  undo_all ();
+	  return 0;
 	}
 
       i2_code_number = recog_for_combine (&newi2pat, i2, &new_i2_notes);
@@ -3564,13 +3571,11 @@  try_combine (rtx i3, rtx i2, rtx i1, int
 		  temp = newpat;
 		  newpat = XVECEXP (newi2pat, 0, 0);
 		  newi2pat = temp;
-#ifdef HAVE_cc0
-		  if (reg_referenced_p (cc0_rtx, newpat))
+		  if (!pat0_in_i2_ok || !pat1_in_i2_ok)
 		    {
 		      undo_all ();
 		      return 0;
 		    }
-#endif
 
 		  i2_code_number = recog_for_combine (&newi2pat, i2,
 						      &new_i2_notes);
--- gcc/testsuite/gcc.c-torture/execute/pr45695.c.jj	2010-09-17 13:47:01.393645810 +0200
+++ gcc/testsuite/gcc.c-torture/execute/pr45695.c	2010-09-17 13:46:19.000000000 +0200
@@ -0,0 +1,32 @@ 
+/* PR rtl-optimization/45695 */
+
+extern void abort (void);
+
+__attribute__((noinline)) void
+g (int x)
+{
+  asm volatile ("" : "+r" (x));
+}
+
+__attribute__((noinline)) int
+f (int a, int b, int d)
+{
+  int r = -1;
+  b += d;
+  if (d == a)
+    r = b - d;
+  g (b);
+  return r;
+}
+
+int
+main (void)
+{
+  int l;
+  asm ("" : "=r" (l) : "0" (0));
+  if (f (l + 0, l + 1, l + 4) != -1)
+    abort ();
+  if (f (l + 4, l + 1, l + 4) != 1)
+    abort ();
+  return 0;
+}