Patchwork Fix combiner (PRs rtl-optimization/46034, rtl-optimization/46212, rtl-optimization/46248)

login
register
mail settings
Submitter Jakub Jelinek
Date Nov. 3, 2010, 8:31 a.m.
Message ID <20101103083129.GP29412@tyan-ft48-01.lab.bos.redhat.com>
Download mbox | patch
Permalink /patch/69946/
State New
Headers show

Comments

Jakub Jelinek - Nov. 3, 2010, 8:31 a.m.
On Tue, Nov 02, 2010 at 10:01:32PM +0100, Eric Botcazou wrote:
> > I think you are right, if !i1_feeds_i2_n then the copy is not needed,
> > because it will not be used.  If !i0dest_in_i0src, I think
> > the copy is not strictily needed, because it shouldn't matter whether
> > i0dest is replaced with i0src just once or more than once, but if you
> > prefer the if (i0_feeds_i1_n && added_sets_2 && i1_feeds_i2_n)
> > condition, I can bootstrap/regtest it with that.
> 
> Yes, I think this is easier to understand that way.  OK with this change if it 
> successfully completes the testing cycle.

Here is what I have committed after bootstrap/regtest.  Thanks.

2010-11-03  Jakub Jelinek  <jakub@redhat.com>

	PR rtl-optimization/46034
	PR rtl-optimization/46212
	PR rtl-optimization/46248
	* combine.c (try_combine): If added_sets_2 where i0dest_in_i0src
	and i0 feeds i1 and i1 feeds i2 or i0 feeds i2, make a copy of i1src
	before i0dest -> i0src substitution and pass 1 instead of 0 as last
	argument to subst on i2pat.

	* gcc.c-torture/compile/pr46034.c: New test.
	* gcc.c-torture/compile/pr46248.c: New test.
	* gcc.dg/pr46212.c: New test.


	Jakub

Patch

--- gcc/combine.c.jj	2010-11-01 09:07:24.000000000 +0100
+++ gcc/combine.c	2010-11-01 12:01:15.000000000 +0100
@@ -2502,6 +2502,7 @@  try_combine (rtx i3, rtx i2, rtx i1, rtx
   rtx i3dest_killed = 0;
   /* SET_DEST and SET_SRC of I2, I1 and I0.  */
   rtx i2dest = 0, i2src = 0, i1dest = 0, i1src = 0, i0dest = 0, i0src = 0;
+  rtx i1src_copy = 0;
   /* Set if I2DEST was reused as a scratch register.  */
   bool i2scratch = false;
   /* The PATTERNs of I0, I1, and I2, or a copy of them in certain cases.  */
@@ -3128,6 +3129,11 @@  try_combine (rtx i3, rtx i2, rtx i1, rtx
 	  return 0;
 	}
 
+      /* Following subst may modify i1src, make a copy of it
+	 before it is for added_sets_2 handling if needed.  */
+      if (i0_feeds_i1_n && added_sets_2 && i1_feeds_i2_n)
+	i1src_copy = copy_rtx (i1src);
+
       n_occurrences = 0;
       subst_low_luid = DF_INSN_LUID (i0);
       newpat = subst (newpat, i0dest, i0src, 0,
@@ -3200,11 +3206,10 @@  try_combine (rtx i3, rtx i2, rtx i1, rtx
       if (added_sets_2)
 	{
 	  rtx t = i2pat;
-	  if (i0_feeds_i2_n)
-	    t = subst (t, i0dest, i0src, 0, 0);
 	  if (i1_feeds_i2_n)
-	    t = subst (t, i1dest, i1src, 0, 0);
-	  if (i0_feeds_i1_n && i1_feeds_i2_n)
+	    t = subst (t, i1dest, i1src_copy ? i1src_copy : i1src, 0,
+		       i0_feeds_i1_n && i0dest_in_i0src);
+	  if ((i0_feeds_i1_n && i1_feeds_i2_n) || i0_feeds_i2_n)
 	    t = subst (t, i0dest, i0src, 0, 0);
 
 	  XVECEXP (newpat, 0, --total_sets) = t;
--- gcc/testsuite/gcc.c-torture/compile/pr46034.c.jj	2010-11-01 12:14:56.000000000 +0100
+++ gcc/testsuite/gcc.c-torture/compile/pr46034.c	2010-11-01 12:06:58.000000000 +0100
@@ -0,0 +1,14 @@ 
+/* PR rtl-optimization/46034 */
+
+void bar (int);
+
+void
+foo (int x, int y)
+{
+  int i;
+  for (i = 0; i < x; i++)
+    {
+      y = __builtin_abs (y);
+      bar (y / 2);
+    }
+}
--- gcc/testsuite/gcc.c-torture/compile/pr46248.c.jj	2010-11-01 12:14:59.000000000 +0100
+++ gcc/testsuite/gcc.c-torture/compile/pr46248.c	2010-11-01 12:14:11.000000000 +0100
@@ -0,0 +1,32 @@ 
+/* PR rtl-optimization/46248 */
+
+struct S
+{
+  int s;
+};
+
+void
+foo (unsigned char *x, int y, struct S *z)
+{
+  const int l1 = y;
+  const int l2 = y + l1;
+  const int l3 = y + l2;
+  const int l4 = y + l3;
+  const int l5 = y + l4;
+  const int l6 = y + l5;
+  const int l7 = y + l6;
+  int i;
+  for (i = 0; i < 8; i++)
+    {
+      int a = x[l3] - x[l4];
+      int b = x[l4] - x[l5];
+      int c = x[l5] - x[l6];
+      int d = (b >= 0 ? b : -b) - (((a >= 0 ? a : -a) + (c >= 0 ? c : -c)) >> 1);
+      if (d < z->s * 2)
+	{
+	  int v = d * (-b > 0 ? 1 : -1);
+	  x[l2] += v >> 3;
+	  x[l7] -= v >> 3;
+	}
+    }
+}
--- gcc/testsuite/gcc.dg/pr46212.c.jj	2010-11-01 12:15:14.000000000 +0100
+++ gcc/testsuite/gcc.dg/pr46212.c	2010-11-01 12:17:55.000000000 +0100
@@ -0,0 +1,23 @@ 
+/* PR rtl-optimization/46212 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -funroll-loops" } */
+/* { dg-options "-O3 -funroll-loops -march=i386" { target { { i686-*-* x86_64-*-* } && ilp32 } } } */
+
+static inline unsigned
+foo (void *x)
+{
+  unsigned y = *(volatile unsigned *) (x);
+  return (y >> 24) | ((y >> 8) & 0xff00) | ((y & 0xff00) << 8) | (y << 24);
+}
+
+void
+bar (void *x, void *y, int z)
+{
+  unsigned c;
+  while (z--)
+    {
+      c = foo (y);
+      *(unsigned *) x = (c & 0xf80000) >> 9 | (c & 0xf800) >> 6
+			| (c & 0xf8) >> 3 | (c & 0x80000000) >> 16;
+    }
+}