Patchwork Fix combiner (PRs rtl-optimization/46034, rtl-optimization/46212, rtl-optimization/46248)

login
register
mail settings
Submitter Jakub Jelinek
Date Nov. 1, 2010, 9:09 p.m.
Message ID <20101101210903.GD29412@tyan-ft48-01.lab.bos.redhat.com>
Download mbox | patch
Permalink /patch/69840/
State New
Headers show

Comments

Jakub Jelinek - Nov. 1, 2010, 9:09 p.m.
Hi!

The testcases below ICE because i0dest_in_i0src replacement of i0dest with
i0src happens multiple times within the i2pat, which sometimes leads to
self-referential rtl.
There are two issues, one is that the earlier
newpat = subst (newpat, i0dest, i0src, ...);
might have (but not necessarily) have changed i1src and so when i1dest
is first replaced with i1src that way modified and then i0dest is replaced
with i0src, the replacements are already wrong and as testcases show
self-referential.  The other issue is that if we are to apply more than
one substitution on i2pat and i0dest_in_i0src, then we need to pass
1 as last argument to the first subst in order to avoid unwanted
rtl sharing (which again can lead to self-referential rtl).
Another issue is that if all of i0_feeds_i2_n, i0_feeds_i1_n and
i1_feeds_i2_n is true, then we'd be substituting i0dest with i0src
in i2pat twice.

All should be fixed in the following patch, bootstrapped/regtested on
x86_64-linux and i686-linux.  Ok for trunk?

2010-11-01  Jakub Jelinek  <jakub@redhat.com>

	PR rtl-optimization/46034
	PR rtl-optimization/46212
	PR rtl-optimization/46248
	* combine.c (try_combine): If added_sets_2 where i0dest_in_i0src
	and i0 feeds i1 and i1 feeds i2 or i0 feeds i2, make a copy of i1src
	before i0dest -> i0src substitution and pass 1 instead of 0 as last
	argument to subst on i2pat.

	* gcc.c-torture/compile/pr46034.c: New test.
	* gcc.c-torture/compile/pr46248.c: New test.
	* gcc.dg/pr46212.c: New test.


	Jakub

Patch

--- gcc/combine.c.jj	2010-11-01 09:07:24.000000000 +0100
+++ gcc/combine.c	2010-11-01 12:01:15.000000000 +0100
@@ -2502,6 +2502,7 @@  try_combine (rtx i3, rtx i2, rtx i1, rtx
   rtx i3dest_killed = 0;
   /* SET_DEST and SET_SRC of I2, I1 and I0.  */
   rtx i2dest = 0, i2src = 0, i1dest = 0, i1src = 0, i0dest = 0, i0src = 0;
+  rtx i1src_copy = 0;
   /* Set if I2DEST was reused as a scratch register.  */
   bool i2scratch = false;
   /* The PATTERNs of I0, I1, and I2, or a copy of them in certain cases.  */
@@ -3128,6 +3129,14 @@  try_combine (rtx i3, rtx i2, rtx i1, rtx
 	  return 0;
 	}
 
+      /* Following subst may modify i1src, make a copy of it
+	 before it is for added_sets_2 handling if needed.  */
+      if (added_sets_2
+	  && i0dest_in_i0src
+	  && i0_feeds_i1_n
+	  && (i1_feeds_i2_n || i0_feeds_i2_n))
+	i1src_copy = copy_rtx (i1src);
+
       n_occurrences = 0;
       subst_low_luid = DF_INSN_LUID (i0);
       newpat = subst (newpat, i0dest, i0src, 0,
@@ -3200,11 +3209,10 @@  try_combine (rtx i3, rtx i2, rtx i1, rtx
       if (added_sets_2)
 	{
 	  rtx t = i2pat;
-	  if (i0_feeds_i2_n)
-	    t = subst (t, i0dest, i0src, 0, 0);
 	  if (i1_feeds_i2_n)
-	    t = subst (t, i1dest, i1src, 0, 0);
-	  if (i0_feeds_i1_n && i1_feeds_i2_n)
+	    t = subst (t, i1dest, i1src_copy ? i1src_copy : i1src, 0,
+		       i0_feeds_i1_n && i0dest_in_i0src);
+	  if ((i0_feeds_i1_n && i1_feeds_i2_n) || i0_feeds_i2_n)
 	    t = subst (t, i0dest, i0src, 0, 0);
 
 	  XVECEXP (newpat, 0, --total_sets) = t;
--- gcc/testsuite/gcc.c-torture/compile/pr46034.c.jj	2010-11-01 12:14:56.000000000 +0100
+++ gcc/testsuite/gcc.c-torture/compile/pr46034.c	2010-11-01 12:06:58.000000000 +0100
@@ -0,0 +1,14 @@ 
+/* PR rtl-optimization/46034 */
+
+void bar (int);
+
+void
+foo (int x, int y)
+{
+  int i;
+  for (i = 0; i < x; i++)
+    {
+      y = __builtin_abs (y);
+      bar (y / 2);
+    }
+}
--- gcc/testsuite/gcc.c-torture/compile/pr46248.c.jj	2010-11-01 12:14:59.000000000 +0100
+++ gcc/testsuite/gcc.c-torture/compile/pr46248.c	2010-11-01 12:14:11.000000000 +0100
@@ -0,0 +1,32 @@ 
+/* PR rtl-optimization/46248 */
+
+struct S
+{
+  int s;
+};
+
+void
+foo (unsigned char *x, int y, struct S *z)
+{
+  const int l1 = y;
+  const int l2 = y + l1;
+  const int l3 = y + l2;
+  const int l4 = y + l3;
+  const int l5 = y + l4;
+  const int l6 = y + l5;
+  const int l7 = y + l6;
+  int i;
+  for (i = 0; i < 8; i++)
+    {
+      int a = x[l3] - x[l4];
+      int b = x[l4] - x[l5];
+      int c = x[l5] - x[l6];
+      int d = (b >= 0 ? b : -b) - (((a >= 0 ? a : -a) + (c >= 0 ? c : -c)) >> 1);
+      if (d < z->s * 2)
+	{
+	  int v = d * (-b > 0 ? 1 : -1);
+	  x[l2] += v >> 3;
+	  x[l7] -= v >> 3;
+	}
+    }
+}
--- gcc/testsuite/gcc.dg/pr46212.c.jj	2010-11-01 12:15:14.000000000 +0100
+++ gcc/testsuite/gcc.dg/pr46212.c	2010-11-01 12:17:55.000000000 +0100
@@ -0,0 +1,23 @@ 
+/* PR rtl-optimization/46212 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -funroll-loops" } */
+/* { dg-options "-O3 -funroll-loops -march=i386" { target { { i686-*-* x86_64-*-* } && ilp32 } } } */
+
+static inline unsigned
+foo (void *x)
+{
+  unsigned y = *(volatile unsigned *) (x);
+  return (y >> 24) | ((y >> 8) & 0xff00) | ((y & 0xff00) << 8) | (y << 24);
+}
+
+void
+bar (void *x, void *y, int z)
+{
+  unsigned c;
+  while (z--)
+    {
+      c = foo (y);
+      *(unsigned *) x = (c & 0xf80000) >> 9 | (c & 0xf800) >> 6
+			| (c & 0xf8) >> 3 | (c & 0x80000000) >> 16;
+    }
+}