patch to fix PR57193

Message ID 767eae56-0126-8751-cfd6-ed9f35a8660d@redhat.com
State New
Headers show
Series
  • patch to fix PR57193
Related show

Commit Message

Vladimir Makarov Feb. 9, 2018, 6:52 p.m.
The following patch fixes

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57193

   The patch introduces a new heuristic to change order of coloring.  
Allocnos conflicting with other allocnos preferring hard registers are 
colored first when other higher-level heuristics can not differ them.  
On x86_64 this new heuristic results in practically the same SPEC2000 
performance and code size.

   The patch was successfully bootstrapped and tested on x86-64.

   Committed as rev. 257537.

Patch

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 257536)
+++ ChangeLog	(working copy)
@@ -1,3 +1,15 @@ 
+2018-02-09  Vladimir Makarov  <vmakarov@redhat.com>
+
+	PR rtl-optimization/57193
+	* ira-color.c (struct allocno_color_data): Add member
+	conflict_allocno_hard_prefs.
+	(update_conflict_allocno_hard_prefs): New.
+	(bucket_allocno_compare_func): Add a preference based on
+	conflict_allocno_hard_prefs.
+	(push_allocno_to_stack): Update conflict_allocno_hard_prefs.
+	(color_allocnos): Remove a dead code.  Initiate
+	conflict_allocno_hard_prefs.  Call update_costs_from_prefs.
+
 2018-02-09  Jakub Jelinek  <jakub@redhat.com>
 
 	PR target/84226
Index: testsuite/ChangeLog
===================================================================
--- testsuite/ChangeLog	(revision 257536)
+++ testsuite/ChangeLog	(working copy)
@@ -1,3 +1,8 @@ 
+2018-02-09  Vladimir Makarov  <vmakarov@redhat.com>
+
+	PR rtl-optimization/57193
+	* gcc.target/i386/57193.c: New.
+
 2018-02-09  Jakub Jelinek  <jakub@redhat.com>
 
 	PR target/84226
Index: ira-color.c
===================================================================
--- ira-color.c	(revision 257157)
+++ ira-color.c	(working copy)
@@ -112,6 +112,9 @@  struct allocno_color_data
      available for the allocno allocation.  It is number of the
      profitable hard regs.  */
   int available_regs_num;
+  /* Sum of frequencies of hard register preferences of all
+     conflicting allocnos which are not the coloring stack yet.  */
+  int conflict_allocno_hard_prefs;
   /* Allocnos in a bucket (used in coloring) chained by the following
      two members.  */
   ira_allocno_t next_bucket_allocno;
@@ -1435,6 +1438,36 @@  update_costs_from_copies (ira_allocno_t
   update_costs_from_allocno (allocno, hard_regno, 1, decr_p, record_p);
 }
 
+/* Update conflict_allocno_hard_prefs of allocnos conflicting with
+   ALLOCNO.  */
+static void
+update_conflict_allocno_hard_prefs (ira_allocno_t allocno)
+{
+  int l, nr = ALLOCNO_NUM_OBJECTS (allocno);
+  
+  for (l = 0; l < nr; l++)
+    {
+      ira_object_t conflict_obj, obj = ALLOCNO_OBJECT (allocno, l);
+      ira_object_conflict_iterator oci;
+      
+      FOR_EACH_OBJECT_CONFLICT (obj, conflict_obj, oci)
+	{
+	  ira_allocno_t conflict_a = OBJECT_ALLOCNO (conflict_obj);
+	  allocno_color_data_t conflict_data = ALLOCNO_COLOR_DATA (conflict_a);
+	  ira_pref_t pref;
+
+	  if (!(hard_reg_set_intersect_p
+		(ALLOCNO_COLOR_DATA (allocno)->profitable_hard_regs,
+		 conflict_data->profitable_hard_regs)))
+	    continue;
+	  for (pref = ALLOCNO_PREFS (allocno);
+	       pref != NULL;
+	       pref = pref->next_pref)
+	    conflict_data->conflict_allocno_hard_prefs += pref->freq;
+	}
+    }
+}
+
 /* Restore costs of allocnos connected to ALLOCNO by copies as it was
    before updating costs of these allocnos from given allocno.  This
    is a wise thing to do as if given allocno did not get an expected
@@ -2223,7 +2256,7 @@  bucket_allocno_compare_func (const void
 {
   ira_allocno_t a1 = *(const ira_allocno_t *) v1p;
   ira_allocno_t a2 = *(const ira_allocno_t *) v2p;
-  int diff, freq1, freq2, a1_num, a2_num;
+  int diff, freq1, freq2, a1_num, a2_num, pref1, pref2;
   ira_allocno_t t1 = ALLOCNO_COLOR_DATA (a1)->first_thread_allocno;
   ira_allocno_t t2 = ALLOCNO_COLOR_DATA (a2)->first_thread_allocno;
   int cl1 = ALLOCNO_CLASS (a1), cl2 = ALLOCNO_CLASS (a2);
@@ -2253,6 +2286,11 @@  bucket_allocno_compare_func (const void
   a2_num = ALLOCNO_COLOR_DATA (a2)->available_regs_num;
   if ((diff = a2_num - a1_num) != 0)
     return diff;
+  /* Push allocnos with minimal conflict_allocno_hard_prefs first.  */
+  pref1 = ALLOCNO_COLOR_DATA (a1)->conflict_allocno_hard_prefs;
+  pref2 = ALLOCNO_COLOR_DATA (a2)->conflict_allocno_hard_prefs;
+  if ((diff = pref1 - pref2) != 0)
+    return diff;
   return ALLOCNO_NUM (a2) - ALLOCNO_NUM (a1);
 }
 
@@ -2339,7 +2377,8 @@  delete_allocno_from_bucket (ira_allocno_
 /* Put allocno A onto the coloring stack without removing it from its
    bucket.  Pushing allocno to the coloring stack can result in moving
    conflicting allocnos from the uncolorable bucket to the colorable
-   one.  */
+   one.  Update conflict_allocno_hard_prefs of the conflicting
+   allocnos which are not on stack yet.  */
 static void
 push_allocno_to_stack (ira_allocno_t a)
 {
@@ -2369,15 +2408,19 @@  push_allocno_to_stack (ira_allocno_t a)
       FOR_EACH_OBJECT_CONFLICT (obj, conflict_obj, oci)
 	{
 	  ira_allocno_t conflict_a = OBJECT_ALLOCNO (conflict_obj);
-	  
+	  ira_pref_t pref;
+
 	  conflict_data = ALLOCNO_COLOR_DATA (conflict_a);
-	  if (conflict_data->colorable_p
-	      || ! conflict_data->in_graph_p
+	  if (! conflict_data->in_graph_p
 	      || ALLOCNO_ASSIGNED_P (conflict_a)
 	      || !(hard_reg_set_intersect_p
 		   (ALLOCNO_COLOR_DATA (a)->profitable_hard_regs,
 		    conflict_data->profitable_hard_regs)))
 	    continue;
+	  for (pref = ALLOCNO_PREFS (a); pref != NULL; pref = pref->next_pref)
+	    conflict_data->conflict_allocno_hard_prefs -= pref->freq;
+	  if (conflict_data->colorable_p)
+	    continue;
 	  ira_assert (bitmap_bit_p (coloring_allocno_bitmap,
 				    ALLOCNO_NUM (conflict_a)));
 	  if (update_left_conflict_sizes_p (conflict_a, a, size))
@@ -3048,21 +3091,12 @@  color_allocnos (void)
   setup_profitable_hard_regs ();
   EXECUTE_IF_SET_IN_BITMAP (coloring_allocno_bitmap, 0, i, bi)
     {
-      int l, nr;
-      HARD_REG_SET conflict_hard_regs;
       allocno_color_data_t data;
       ira_pref_t pref, next_pref;
 
       a = ira_allocnos[i];
-      nr = ALLOCNO_NUM_OBJECTS (a);
-      CLEAR_HARD_REG_SET (conflict_hard_regs);
-      for (l = 0; l < nr; l++)
-	{
-	  ira_object_t obj = ALLOCNO_OBJECT (a, l);
-	  IOR_HARD_REG_SET (conflict_hard_regs,
-			    OBJECT_CONFLICT_HARD_REGS (obj));
-	}
       data = ALLOCNO_COLOR_DATA (a);
+      data->conflict_allocno_hard_prefs = 0;
       for (pref = ALLOCNO_PREFS (a); pref != NULL; pref = next_pref)
 	{
 	  next_pref = pref->next_pref;
@@ -3072,6 +3106,7 @@  color_allocnos (void)
 	    ira_remove_pref (pref);
 	}
     }
+  
   if (flag_ira_algorithm == IRA_ALGORITHM_PRIORITY)
     {
       n = 0;
@@ -3134,6 +3169,7 @@  color_allocnos (void)
 	    {
 	      ALLOCNO_COLOR_DATA (a)->in_graph_p = true;
 	      update_costs_from_prefs (a);
+	      update_conflict_allocno_hard_prefs (a);
 	    }
 	  else
 	    {
Index: testsuite/gcc.target/i386/pr57193.c
===================================================================
--- testsuite/gcc.target/i386/pr57193.c	(nonexistent)
+++ testsuite/gcc.target/i386/pr57193.c	(working copy)
@@ -0,0 +1,16 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "movdqa" 2 } } */
+
+#include <emmintrin.h>
+
+void test1(const __m128i* in1, const __m128i* in2, __m128i* out,
+           __m128i f, __m128i zero)
+{
+	__m128i c = _mm_avg_epu8(*in1, *in2);
+	__m128i l = _mm_unpacklo_epi8(c, zero);
+	__m128i h = _mm_unpackhi_epi8(c, zero);
+	__m128i m = _mm_mulhi_epu16(l, f);
+	__m128i n = _mm_mulhi_epu16(h, f);
+	*out = _mm_packus_epi16(m, n);
+}