Patchwork patch to fix PR 60650

login
register
mail settings
Submitter Vladimir Makarov
Date March 27, 2014, 6:51 p.m.
Message ID <5334733A.2060708@redhat.com>
Download mbox | patch
Permalink /patch/334453/
State New
Headers show

Comments

Vladimir Makarov - March 27, 2014, 6:51 p.m.
The following patch fixes

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=60650

  The reason was in general regs pool fragmentation which resulted in
failure to assign double regs to 3 conflicting reload pseudos.  The
fragmentation started in IRA and the chain unfortunate events further in
LRA resulted in LRA crash.

  The patch fixes this problem by more radical than previous approach. 
In case we can not assign a hard reg on the 1st pass (which is very rare
event as we can spill *non-reload* pseudos), we spill all conflicting
pseudos and assign always 1st available hard reg which permits to avoid
the fragmentation.

  The patch was successfully bootstrapped and tested on x86-64 and arm.

  Committed as rev.208876.

2014-03-27  Vladimir Makarov  <vmakarov@redhat.com>

        PR rtl-optimization/60650
        * lra-assign.c (find_hard_regno_for, spill_for): Add parameter
        first_p.  Use it.
        (find_spills_for): New.
        (assign_by_spills): Pass the new parameter to find_hard_regno_for.
        Spill all pseudos on the second iteration.

2014-03-27  Vladimir Makarov  <vmakarov@redhat.com>

        PR rtl-optimization/60650
        * gcc.target/arm/pr60650.c: New.
Jakub Jelinek - March 29, 2014, 11:24 a.m.
On Thu, Mar 27, 2014 at 02:51:38PM -0400, Vladimir Makarov wrote:
> 2014-03-27  Vladimir Makarov  <vmakarov@redhat.com>
> 
>         PR rtl-optimization/60650
>         * gcc.target/arm/pr60650.c: New.

The test fails for me with:
spawn -ignore SIGHUP /builddir/build/BUILD/gcc-4.9.0-20140328/obj-armv7hl-redhat-linux-gnueabi/gcc/xgcc -B/builddir/build/BUILD/gcc-4.9.0-20140328/obj-armv7hl-redhat-linux-gnueabi/gcc/builddir/build/BUILD/gcc-4.9.0-20140328/gcc/testsuite/gcc.target/arm/pr60650.c -fno-diagnostics-show-caret -fdiagnostics-color=never -O2 -fno-omit-frame-pointer -mabi=apcs-gnu -march=armv7-a -S -o pr60650.s
/builddir/build/BUILD/gcc-4.9.0-20140328/gcc/testsuite/gcc.target/arm/pr60650.c:1:0: sorry, unimplemented: -mfloat-abi=hard and VFP
compiler exited with status 1
Does one have to pass -mfloat-abi= option too unconditionally?
Can somebody from the ARM/Linaro people handle this?

Also:

> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/pr60650.c
...
> +      asm ("1\t.long ");

This asm looks quite bogus, and with
	asm ("");
it ICEs the same way, can it be changed?

> +      __builtin_unreachable ();
> +    }
> +}

	Jakub

Patch

index 28d4a0f..cba8b8f 100644
--- a/gcc/lra-assigns.c
+++ b/gcc/lra-assigns.c
@@ -451,10 +451,16 @@  adjust_hard_regno_cost (int hard_regno, int incr)
    that register.  (If several registers have equal cost, the one with
    the highest priority wins.)  Return -1 on failure.
 
+   If FIRST_P, return the first available hard reg ignoring other
+   criteria, e.g. allocation cost.  This approach results in less hard
+   reg pool fragmentation and permit to allocate hard regs to reload
+   pseudos in complicated situations where pseudo sizes are different.
+
    If TRY_ONLY_HARD_REGNO >= 0, consider only that hard register,
    otherwise consider all hard registers in REGNO's class.  */
 static int
-find_hard_regno_for (int regno, int *cost, int try_only_hard_regno)
+find_hard_regno_for (int regno, int *cost, int try_only_hard_regno,
+		     bool first_p)
 {
   HARD_REG_SET conflict_set;
   int best_cost = INT_MAX, best_priority = INT_MIN, best_usage = INT_MAX;
@@ -630,7 +636,7 @@  find_hard_regno_for (int regno, int *cost, int try_only_hard_regno)
 	      best_usage = lra_hard_reg_usage[hard_regno];
 	    }
 	}
-      if (try_only_hard_regno >= 0)
+      if (try_only_hard_regno >= 0 || (first_p && best_hard_regno >= 0))
 	break;
     }
   if (best_hard_regno >= 0)
@@ -816,9 +822,15 @@  static int *sorted_reload_pseudos;
    to be spilled), we take into account not only how REGNO will
    benefit from the spills but also how other reload pseudos not yet
    assigned to hard registers benefit from the spills too.  In very
-   rare cases, the function can fail and return -1.  */
+   rare cases, the function can fail and return -1.
+
+   If FIRST_P, return the first available hard reg ignoring other
+   criteria, e.g. allocation cost and cost of spilling non-reload
+   pseudos.  This approach results in less hard reg pool fragmentation
+   and permit to allocate hard regs to reload pseudos in complicated
+   situations where pseudo sizes are different.  */
 static int
-spill_for (int regno, bitmap spilled_pseudo_bitmap)
+spill_for (int regno, bitmap spilled_pseudo_bitmap, bool first_p)
 {
   int i, j, n, p, hard_regno, best_hard_regno, cost, best_cost, rclass_size;
   int reload_hard_regno, reload_cost;
@@ -905,7 +917,7 @@  spill_for (int regno, bitmap spilled_pseudo_bitmap)
 	      && (ira_reg_classes_intersect_p
 		  [rclass][regno_allocno_class_array[reload_regno]])
 	      && live_pseudos_reg_renumber[reload_regno] < 0
-	      && find_hard_regno_for (reload_regno, &cost, -1) < 0)
+	      && find_hard_regno_for (reload_regno, &cost, -1, first_p) < 0)
 	    sorted_reload_pseudos[n++] = reload_regno;
       EXECUTE_IF_SET_IN_BITMAP (&spill_pseudos_bitmap, 0, spill_regno, bi)
 	{
@@ -914,7 +926,7 @@  spill_for (int regno, bitmap spilled_pseudo_bitmap)
 	    fprintf (lra_dump_file, " spill %d(freq=%d)",
 		     spill_regno, lra_reg_info[spill_regno].freq);
 	}
-      hard_regno = find_hard_regno_for (regno, &cost, -1);
+      hard_regno = find_hard_regno_for (regno, &cost, -1, first_p);
       if (hard_regno >= 0)
 	{
 	  assign_temporarily (regno, hard_regno);
@@ -926,7 +938,7 @@  spill_for (int regno, bitmap spilled_pseudo_bitmap)
 	      lra_assert (live_pseudos_reg_renumber[reload_regno] < 0);
 	      if ((reload_hard_regno
 		   = find_hard_regno_for (reload_regno,
-					  &reload_cost, -1)) >= 0)
+					  &reload_cost, -1, first_p)) >= 0)
 		{
 		  if (lra_dump_file != NULL)
 		    fprintf (lra_dump_file, " assign %d(cost=%d)",
@@ -1148,8 +1160,8 @@  improve_inheritance (bitmap changed_pseudos)
 		   regno, hard_regno, another_regno, another_hard_regno);
 	      update_lives (another_regno, true);
 	      lra_setup_reg_renumber (another_regno, -1, false);
-	      if (hard_regno
-		  == find_hard_regno_for (another_regno, &cost, hard_regno))
+	      if (hard_regno == find_hard_regno_for (another_regno, &cost,
+						     hard_regno, false))
 		assign_hard_regno (hard_regno, another_regno);
 	      else
 		assign_hard_regno (another_hard_regno, another_regno);
@@ -1166,15 +1178,50 @@  static bitmap_head all_spilled_pseudos;
 /* All pseudos whose allocation was changed.  */
 static bitmap_head changed_pseudo_bitmap;
 
+
+/* Add to LIVE_RANGE_HARD_REG_PSEUDOS all pseudos conflicting with
+   REGNO and whose hard regs can be assigned to REGNO.  */
+static void
+find_all_spills_for (int regno)
+{
+  int p;
+  lra_live_range_t r;
+  unsigned int k;
+  bitmap_iterator bi;
+  enum reg_class rclass;
+  bool *rclass_intersect_p;
+
+  rclass = regno_allocno_class_array[regno];
+  rclass_intersect_p = ira_reg_classes_intersect_p[rclass];
+  for (r = lra_reg_info[regno].live_ranges; r != NULL; r = r->next)
+    {
+      EXECUTE_IF_SET_IN_BITMAP (&live_hard_reg_pseudos[r->start], 0, k, bi)
+	if (rclass_intersect_p[regno_allocno_class_array[k]])
+	  sparseset_set_bit (live_range_hard_reg_pseudos, k);
+      for (p = r->start + 1; p <= r->finish; p++)
+	{
+	  lra_live_range_t r2;
+
+	  for (r2 = start_point_ranges[p];
+	       r2 != NULL;
+	       r2 = r2->start_next)
+	    {
+	      if (live_pseudos_reg_renumber[r2->regno] >= 0
+		  && rclass_intersect_p[regno_allocno_class_array[r2->regno]])
+		sparseset_set_bit (live_range_hard_reg_pseudos, r2->regno);
+	    }
+	}
+    }
+}
+
 /* Assign hard registers to reload pseudos and other pseudos.  */
 static void
 assign_by_spills (void)
 {
   int i, n, nfails, iter, regno, hard_regno, cost, restore_regno;
   rtx insn;
-  basic_block bb;
   bitmap_head changed_insns, do_not_assign_nonreload_pseudos;
-  unsigned int u;
+  unsigned int u, conflict_regno;
   bitmap_iterator bi;
   bool reload_p;
   int max_regno = max_reg_num ();
@@ -1211,10 +1258,10 @@  assign_by_spills (void)
 		     ORIGINAL_REGNO (regno_reg_rtx[regno]),
 		     lra_reg_info[regno].freq, regno_assign_info[regno].first,
 		     regno_assign_info[regno_assign_info[regno].first].freq);
-	  hard_regno = find_hard_regno_for (regno, &cost, -1);
+	  hard_regno = find_hard_regno_for (regno, &cost, -1, iter == 1);
 	  reload_p = ! bitmap_bit_p (&non_reload_pseudos, regno);
 	  if (hard_regno < 0 && reload_p)
-	    hard_regno = spill_for (regno, &all_spilled_pseudos);
+	    hard_regno = spill_for (regno, &all_spilled_pseudos, iter == 1);
 	  if (hard_regno < 0)
 	    {
 	      if (reload_p)
@@ -1286,61 +1333,41 @@  assign_by_spills (void)
 	  lra_assert (asm_p);
 	  break;
 	}
-      /* This is a very rare event.  We can not assign a hard
-	 register to reload pseudo because the hard register was
-	 assigned to another reload pseudo on a previous
-	 assignment pass.  For x86 example, on the 1st pass we
-	 assigned CX (although another hard register could be used
-	 for this) to reload pseudo in an insn, on the 2nd pass we
-	 need CX (and only this) hard register for a new reload
-	 pseudo in the same insn.  */
+      /* This is a very rare event.  We can not assign a hard register
+	 to reload pseudo because the hard register was assigned to
+	 another reload pseudo on a previous assignment pass.  For x86
+	 example, on the 1st pass we assigned CX (although another
+	 hard register could be used for this) to reload pseudo in an
+	 insn, on the 2nd pass we need CX (and only this) hard
+	 register for a new reload pseudo in the same insn.  Another
+	 possible situation may occur in assigning to multi-regs
+	 reload pseudos when hard regs pool is too fragmented even
+	 after spilling non-reload pseudos.
+
+	 We should do something radical here to succeed.  Here we
+	 spill *all* conflicting pseudos and reassign them.  */
       if (lra_dump_file != NULL)
 	fprintf (lra_dump_file, "  2nd iter for reload pseudo assignments:\n");
+      sparseset_clear (live_range_hard_reg_pseudos);
       for (i = 0; i < nfails; i++)
 	{
 	  if (lra_dump_file != NULL)
 	    fprintf (lra_dump_file, "	 Reload r%d assignment failure\n",
 		     sorted_pseudos[i]);
-	  bitmap_ior_into (&changed_insns,
-			   &lra_reg_info[sorted_pseudos[i]].insn_bitmap);
+	  find_all_spills_for (sorted_pseudos[i]);
+	}
+      EXECUTE_IF_SET_IN_SPARSESET (live_range_hard_reg_pseudos, conflict_regno)
+	{
+	  if ((int) conflict_regno >= lra_constraint_new_regno_start)
+	    sorted_pseudos[nfails++] = conflict_regno;
+	  if (lra_dump_file != NULL)
+	    fprintf (lra_dump_file, "	  Spill %s r%d(hr=%d, freq=%d)\n",
+		     pseudo_prefix_title (conflict_regno), conflict_regno,
+		     reg_renumber[conflict_regno],
+		     lra_reg_info[conflict_regno].freq);
+	  update_lives (conflict_regno, true);
+	  lra_setup_reg_renumber (conflict_regno, -1, false);
 	}
-
-      /* FIXME: Look up the changed insns in the cached LRA insn data using
-	 an EXECUTE_IF_SET_IN_BITMAP over changed_insns.  */
-      FOR_EACH_BB_FN (bb, cfun)
-	FOR_BB_INSNS (bb, insn)
-	if (bitmap_bit_p (&changed_insns, INSN_UID (insn)))
-	  {
-	    lra_insn_recog_data_t data;
-	    struct lra_insn_reg *r;
-
-	    data = lra_get_insn_recog_data (insn);
-	    for (r = data->regs; r != NULL; r = r->next)
-	      {
-		regno = r->regno;
-		/* A reload pseudo did not get a hard register on the
-		   first iteration because of the conflict with
-		   another reload pseudos in the same insn.  So we
-		   consider only reload pseudos assigned to hard
-		   registers.  We shall exclude inheritance pseudos as
-		   they can occur in original insns (not reload ones).
-		   We can omit the check for split pseudos because
-		   they occur only in move insns containing non-reload
-		   pseudos.  */
-		if (regno < lra_constraint_new_regno_start
-		    || bitmap_bit_p (&lra_inheritance_pseudos, regno)
-		    || reg_renumber[regno] < 0)
-		  continue;
-		sorted_pseudos[nfails++] = regno;
-		if (lra_dump_file != NULL)
-		  fprintf (lra_dump_file,
-			   "	  Spill reload r%d(hr=%d, freq=%d)\n",
-			   regno, reg_renumber[regno],
-			   lra_reg_info[regno].freq);
-		update_lives (regno, true);
-		lra_setup_reg_renumber (regno, -1, false);
-	      }
-	  }
       n = nfails;
     }
   improve_inheritance (&changed_pseudo_bitmap);
@@ -1382,7 +1409,7 @@  assign_by_spills (void)
       for (i = 0; i < n; i++)
 	{
 	  regno = sorted_pseudos[i];
-	  hard_regno = find_hard_regno_for (regno, &cost, -1);
+	  hard_regno = find_hard_regno_for (regno, &cost, -1, false);
 	  if (hard_regno >= 0)
 	    {
 	      assign_hard_regno (hard_regno, regno);
diff --git a/gcc/testsuite/gcc.target/arm/pr60650.c b/gcc/testsuite/gcc.target/arm/pr60650.c
new file mode 100644
index 0000000..bfa7f1e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/pr60650.c
@@ -0,0 +1,39 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-omit-frame-pointer -mabi=apcs-gnu -march=armv7-a" } */
+
+struct super_block
+{
+  int s_blocksize_bits;
+};
+struct btrfs_fs_info
+{
+  struct super_block *sb;
+};
+struct btrfs_root
+{
+  struct btrfs_fs_info *fs_info;
+} *b;
+
+
+int a, c, d;
+long long e;
+
+truncate_one_csum (struct btrfs_root *p1, long long p2, long long p3)
+{
+  int f, g, i = p1->fs_info->sb->s_blocksize_bits;
+  g = a;
+  long long h = p2 + p3;
+  f = foo1 (b, 0, c, 0);
+  e = f / g;
+  e <<= p1->fs_info->sb->s_blocksize_bits;
+  if (d < p2)
+    {
+      int j = e - h >> i;
+      foo2 (p1, 0, j);
+    }
+  else
+    {
+      asm ("1\t.long ");
+      __builtin_unreachable ();
+    }
+}