Patchwork Fix combiner with added_sets_[12] (PR rtl-optimization/52060)

login
register
mail settings
Submitter Jakub Jelinek
Date Feb. 6, 2012, 8:43 p.m.
Message ID <20120206204319.GT18768@tyan-ft48-01.lab.bos.redhat.com>
Download mbox | patch
Permalink /patch/139791/
State New
Headers show

Comments

Jakub Jelinek - Feb. 6, 2012, 8:43 p.m.
Hi!

combine_simplify_rtx and its helpers (called from subst) apparently modify
the given RTL in-place, there are many SUBST () calls all around those
functions.  On the attached testcase in particular on arm when
newpat = subst (newpat, i1dest, i1src, 0, 0, 0); is called, force_to_mode
modifies the if_then_else operands through SUBST, which modifies i1src.
When I2 dest is needed later on (i.e. added_sets_2), we subst this i1src
unintentionally clobbered into i2pat, which is incorrect, as that
transformation was only valid as part of the i3 pattern.

Fixed by making a copy of i1src and i0src before we pass those as to
argument to subst, if we'll need them later on for added_sets_[01].

Bootstrapped/regtested on x86_64-linux and i686-linux, tested with cross
to arm, ok for trunk?

2012-02-06  Jakub Jelinek  <jakub@redhat.com>

	PR rtl-optimization/52060
	* combine.c (try_combine): Add i0src_copy and i0src_copy2 variables,
	copy i1src to i1src_copy whenever added_sets_2 && i1_feeds_i2_n already
	before i1dest -> i1src substitution in newpat, copy i0src to i0src_copy
	and/or i0src_copy2 when needed.

	* gcc.dg/torture/pr52060.c: New test.


	Jakub
Eric Botcazou - Feb. 7, 2012, 3:29 p.m.
> 2012-02-06  Jakub Jelinek  <jakub@redhat.com>
>
> 	PR rtl-optimization/52060
> 	* combine.c (try_combine): Add i0src_copy and i0src_copy2 variables,
> 	copy i1src to i1src_copy whenever added_sets_2 && i1_feeds_i2_n already
> 	before i1dest -> i1src substitution in newpat, copy i0src to i0src_copy
> 	and/or i0src_copy2 when needed.
>
> 	* gcc.dg/torture/pr52060.c: New test.

OK, thanks.

Patch

--- gcc/combine.c.jj	2012-02-03 13:31:41.000000000 +0100
+++ gcc/combine.c	2012-02-06 17:49:40.000000000 +0100
@@ -2591,8 +2591,8 @@  try_combine (rtx i3, rtx i2, rtx i1, rtx
   rtx i3dest_killed = 0;
   /* SET_DEST and SET_SRC of I2, I1 and I0.  */
   rtx i2dest = 0, i2src = 0, i1dest = 0, i1src = 0, i0dest = 0, i0src = 0;
-  /* Copy of SET_SRC of I1, if needed.  */
-  rtx i1src_copy = 0;
+  /* Copy of SET_SRC of I1 and I0, if needed.  */
+  rtx i1src_copy = 0, i0src_copy = 0, i0src_copy2 = 0;
   /* Set if I2DEST was reused as a scratch register.  */
   bool i2scratch = false;
   /* The PATTERNs of I0, I1, and I2, or a copy of them in certain cases.  */
@@ -3246,6 +3246,11 @@  try_combine (rtx i3, rtx i2, rtx i1, rtx
       n_occurrences = 0;
       subst_low_luid = DF_INSN_LUID (i1);
 
+      /* If the following substitution will modify I1SRC, make a copy of it
+	 for the case where it is substituted for I1DEST in I2PAT later.  */
+      if (added_sets_2 && i1_feeds_i2_n)
+	i1src_copy = copy_rtx (i1src);
+
       /* If I0 feeds into I1 and I0DEST is in I0SRC, we need to make a unique
 	 copy of I1SRC each time we substitute it, in order to avoid creating
 	 self-referential RTL when we will be substituting I0SRC for I0DEST
@@ -3273,10 +3278,14 @@  try_combine (rtx i3, rtx i2, rtx i1, rtx
 	  return 0;
 	}
 
-      /* If the following substitution will modify I1SRC, make a copy of it
-	 for the case where it is substituted for I1DEST in I2PAT later.  */
-      if (i0_feeds_i1_n && added_sets_2 && i1_feeds_i2_n)
-	i1src_copy = copy_rtx (i1src);
+      /* If the following substitution will modify I0SRC, make a copy of it
+	 for the case where it is substituted for I0DEST in I1PAT later.  */
+      if (added_sets_1 && i0_feeds_i1_n)
+	i0src_copy = copy_rtx (i0src);
+      /* And a copy for I0DEST in I2PAT substitution.  */
+      if (added_sets_2 && ((i0_feeds_i1_n && i1_feeds_i2_n)
+			   || (i0_feeds_i2_n)))
+	i0src_copy2 = copy_rtx (i0src);
 
       n_occurrences = 0;
       subst_low_luid = DF_INSN_LUID (i0);
@@ -3342,7 +3351,7 @@  try_combine (rtx i3, rtx i2, rtx i1, rtx
 	{
 	  rtx t = i1pat;
 	  if (i0_feeds_i1_n)
-	    t = subst (t, i0dest, i0src, 0, 0, 0);
+	    t = subst (t, i0dest, i0src_copy ? i0src_copy : i0src, 0, 0, 0);
 
 	  XVECEXP (newpat, 0, --total_sets) = t;
 	}
@@ -3353,7 +3362,7 @@  try_combine (rtx i3, rtx i2, rtx i1, rtx
 	    t = subst (t, i1dest, i1src_copy ? i1src_copy : i1src, 0, 0,
 		       i0_feeds_i1_n && i0dest_in_i0src);
 	  if ((i0_feeds_i1_n && i1_feeds_i2_n) || i0_feeds_i2_n)
-	    t = subst (t, i0dest, i0src, 0, 0, 0);
+	    t = subst (t, i0dest, i0src_copy2 ? i0src_copy2 : i0src, 0, 0, 0);
 
 	  XVECEXP (newpat, 0, --total_sets) = t;
 	}
--- gcc/testsuite/gcc.dg/torture/pr52060.c.jj	2012-02-06 18:35:51.215511107 +0100
+++ gcc/testsuite/gcc.dg/torture/pr52060.c	2012-02-06 18:34:18.000000000 +0100
@@ -0,0 +1,57 @@ 
+/* PR rtl-optimization/52060 */
+/* { dg-do run { target int32plus } } */
+
+extern void abort (void);
+union U { float f; unsigned int i; };
+
+static inline __attribute__((always_inline)) unsigned int
+foo (float x)
+{
+  union U u;
+  unsigned int a, b, c;
+  int d;
+  int e;
+  u.f = x;
+  d = ((unsigned) u.i >> 23) & 0xFF;
+  c = d < 126 ? 0 : ~0;
+  e = 127 + 30 - d;
+  a = (u.i << 8) | 0x80000000U;
+  b = a & ((1 << e) - 1);
+  a = a >> e;
+  c &= (b | (a & 2)) ? ~0 : ~1;
+  a = ((a + 1U) >> 1) & c;
+  return a;
+}
+
+__attribute__((noinline)) unsigned int
+bar (float x)
+{
+  unsigned int a, b, c;
+  static const unsigned int d[128] =
+  {
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 6, 7
+  };
+  a = foo (1048575.0f * x);
+  c = d[a >> 13];
+  b = (c << 13) | ((a >> (7 - c)) & 0x1fff);
+  return b;
+}
+
+int
+main ()
+{
+  union U u;
+  u.f = 1048575.0f;
+  if (sizeof (u.i) == sizeof (u.f)
+      && u.i == 0x497ffff0U
+      && bar (1.0f) != 65535)
+    abort ();
+  return 0;
+}