Patchwork Fix twolf -funroll-loops -O3 miscompilation (a semi-latent web.c bug)

login
register
mail settings
Submitter Steven Bosscher
Date Nov. 25, 2012, 8:44 p.m.
Message ID <CABu31nPtzhKLpxnZHyVinrsn=sQ41eB1Vu4D1EQPJK1m-7CwZg@mail.gmail.com>
Download mbox | patch
Permalink /patch/201569/
State New
Headers show

Comments

Steven Bosscher - Nov. 25, 2012, 8:44 p.m.
On Sat, Nov 24, 2012 at 2:09 AM, Steven Bosscher wrote:
> On Fri, Nov 23, 2012 at 11:45 PM, Steven Bosscher wrote:
>> Removing the note is easier but it may hurt optimization later on:
>> CSE2 puts the note back in but this introduces a pass ordering
>> dependency. Perhaps that's not a big problem, but I'm not sure...
>
> Hmm, actually CSE2 fails to put the note back, I guess because it's a
> local CSE pass only...

Here's another possible fix: Just punt on all REG_EQUAL notes for the
IV. It's a bit of a big hammer, but there are no code changes in my
set of cc1-i files (compiled with -funroll-loops) on x86_64 and
powerpc64 so apparently it's not all that important to keep the notes.

Comments on this one?

Ciao!
Steven

gcc/
	* rtlanal.c (remove_reg_equal_equiv_notes_for_regno): Fix for
	deferred re-scanning.
	* loop-unroll.c (struct iv_to_split): Add new 'orig_var' member.
	(analyze_iv_to_split_insn): Record it.
	(analyze_insns_in_loop): Remove all REG_EQUAL notes for IVs that
	will be split.
	(apply_opt_in_copies): Use FOR_BB_INSNS_SAFE.

testsuite/
	* gcc.dg/pr55006.c: New test.

Patch

Index: rtlanal.c
===================================================================
--- rtlanal.c	(revision 193394)
+++ rtlanal.c	(working copy)
@@ -2015,15 +2015,18 @@  remove_reg_equal_equiv_notes (rtx insn)
 void
 remove_reg_equal_equiv_notes_for_regno (unsigned int regno)
 {
-  df_ref eq_use;
+  df_ref eq_use, next_eq_use;

   if (!df)
     return;

   /* This loop is a little tricky.  We cannot just go down the chain because
-     it is being modified by some actions in the loop.  So we just iterate
-     over the head.  We plan to drain the list anyway.  */
-  while ((eq_use = DF_REG_EQ_USE_CHAIN (regno)) != NULL)
+     it may be modified by some actions in the loop if re-scanning is not
+     deferred.  We also can't just iterate over the head because the chain
+     may be *not* modified if re-scanning *is* deferred.  */
+  for (eq_use = DF_REG_EQ_USE_CHAIN (regno);
+       eq_use != NULL;
+       eq_use = next_eq_use)
     {
       rtx insn = DF_REF_INSN (eq_use);
       rtx note = find_reg_equal_equiv_note (insn);
@@ -2033,6 +2036,8 @@  remove_reg_equal_equiv_notes_for_regno (
 	 remove_note.  */
       gcc_assert (note);

+      next_eq_use = DF_REF_NEXT_REG (eq_use);
+
       remove_note (insn, note);
     }
 }
Index: loop-unroll.c
===================================================================
--- loop-unroll.c	(revision 193394)
+++ loop-unroll.c	(working copy)
@@ -74,6 +74,7 @@  along with GCC; see the file COPYING3.
 struct iv_to_split
 {
   rtx insn;		/* The insn in that the induction variable occurs.  */
+  rtx orig_var;		/* The variable (register) for the IV before split.  */
   rtx base_var;		/* The variable on that the values in the further
 			   iterations are based.  */
   rtx step;		/* Step of the induction variable.  */
@@ -1833,6 +1834,7 @@  analyze_iv_to_split_insn (rtx insn)
   /* Record the insn to split.  */
   ivts = XNEW (struct iv_to_split);
   ivts->insn = insn;
+  ivts->orig_var = dest;
   ivts->base_var = NULL_RTX;
   ivts->step = iv.step;
   ivts->next = NULL;
@@ -1913,6 +1915,12 @@  analyze_insns_in_loop (struct loop *loop

         if (ivts)
           {
+	    /* Updating REG_EQUAL notes for this IV is tricky: We cannot tell
+	       until after unrolling whether an EQ_USE is reached by the split
+	       IV while the IV reg is still live.  See PR55006.
+	       Be conservative, remove all such notes.  */
+	    remove_reg_equal_equiv_notes_for_regno (REGNO (ivts->orig_var));
+
             slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
 	    gcc_assert (*slot1 == NULL);
             *slot1 = ivts;
@@ -2293,9 +2301,8 @@  apply_opt_in_copies (struct opt_info *op
 					unrolling);
       bb->aux = 0;
       orig_insn = BB_HEAD (orig_bb);
-      for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
+      FOR_BB_INSNS_SAFE (bb, insn, next)
         {
-          next = NEXT_INSN (insn);
 	  if (!INSN_P (insn)
 	      || (DEBUG_INSN_P (insn)
 		  && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
Index: testsuite/gcc.dg/pr55006.c
===================================================================
--- testsuite/gcc.dg/pr55006.c	(revision 0)
+++ testsuite/gcc.dg/pr55006.c	(revision 0)
@@ -0,0 +1,88 @@ 
+/* PR rtl-optimization/55006 */
+/* { dg-do run } */
+/* { dg-options "-O3 -fomit-frame-pointer -funroll-loops" } */
+
+extern void abort (void) __attribute__ ((__noreturn__));
+extern void *memcpy (void *__restrict, const void *__restrict, __SIZE_TYPE__);
+
+static void __attribute__ ((__noinline__, __noclone__)) ga4076 (void)
+{
+  int D833;
+  float D834;
+  int D837;
+  int D840;
+  float D841;
+  int D844;
+  float dda[100];
+  int ids;
+  float limit3;
+  int offset2;
+  int pos1;
+  int S4;
+
+  static float A0[100] =
+  { 10e+0, 20e+0, 30e+0, 40e+0, 50e+0, 60e+0, 70e+0, 80e+0, 90e+0,
+    10e+1, 11e+1, 12e+1, 13e+1, 14e+1, 15e+1, 16e+1, 17e+1, 18e+1,
+    19e+1, 20e+1, 21e+1, 22e+1, 23e+1, 24e+1, 25e+1, 26e+1, 27e+1,
+    28e+1, 29e+1, 30e+1, 31e+1, 32e+1, 33e+1, 34e+1, 35e+1, 36e+1,
+    37e+1, 38e+1, 39e+1, 40e+1, 41e+1, 42e+1, 43e+1, 44e+1, 45e+1,
+    46e+1, 47e+1, 48e+1, 49e+1, 50e+1, 51e+1, 52e+1, 53e+1, 54e+1,
+    55e+1, 56e+1, 57e+1, 58e+1, 59e+1, 60e+1, 61e+1, 62e+1, 63e+1,
+    64e+1, 65e+1, 66e+1, 67e+1, 68e+1, 69e+1, 70e+1, 71e+1, 72e+1,
+    73e+1, 74e+1, 75e+1, 76e+1, 77e+1, 78e+1, 79e+1, 80e+1, 81e+1,
+    82e+1, 83e+1, 84e+1, 85e+1, 86e+1, 87e+1, 88e+1, 89e+1, 90e+1,
+    91e+1, 92e+1, 93e+1, 94e+1, 95e+1, 96e+1, 97e+1, 98e+1, 99e+1,
+    10e+2
+  };
+
+  memcpy (dda, A0, sizeof (dda));
+
+  limit3 = -1. * __builtin_inf();
+  pos1 = 0;
+  offset2 = 0;
+  S4 = 1;
+
+D831:
+  if (S4 > 100) goto L3; else goto D832;
+D832:
+  D833 = S4 - 1;
+  D834 = dda[D833];
+  if (D834 >= limit3) goto D835; else goto D836;
+D835:
+  D837 = S4 - 1;
+  limit3 = dda[D837];
+  pos1 = S4 + offset2;
+  goto L1;
+D836:
+  S4 = S4 + 1;
+  goto D831;
+L3:
+  pos1 = 1;
+  goto L2;
+L1:
+  if (S4 > 100) goto L2; else goto D839;
+D839:
+  D840 = S4 - 1;
+  D841 = dda[D840];
+  if (D841 > limit3) goto D842; else goto D843;
+D842:
+  D844 = S4 - 1;
+  limit3 = dda[D844];
+  pos1 = S4 + offset2;
+D843:
+  S4 = S4 + 1;
+  goto L1;
+L2:
+  ids = pos1;
+  if (ids != 100)
+    abort ();
+}
+
+
+int
+main (void)
+{
+  ga4076 ();
+  return 0;
+}
+