diff mbox series

patch to fix PR80481

Message ID 42f636f5-ef34-1e0b-a920-b73252435629@redhat.com
State New
Headers show
Series patch to fix PR80481 | expand

Commit Message

Vladimir Makarov Jan. 12, 2018, 5:11 p.m. UTC
The following patch fixes

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80481

    During forming an allocation thread in a multi-region function a 
conflict allocno was added to the thread and that resulted in generation 
of additional moves.  The patch prevents inclusion of conflict allocnos 
into allocation threads.

   The patch was successfully bootstrapped and tested on x86-64. The 
patch changes x86-64 SPEC2000 rates and code size insignificantly.

   Committed as rev. 256590.
diff mbox series

Patch

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 256589)
+++ ChangeLog	(working copy)
@@ -1,3 +1,11 @@ 
+2018-01-12  Vladimir Makarov  <vmakarov@redhat.com>
+
+	PR rtl-optimization/80481
+	* ira-color.c (get_cap_member): New function.
+	(allocnos_conflict_by_live_ranges_p): Use it.
+	(slot_coalesced_allocno_live_ranges_intersect_p): Add assert.
+	(setup_slot_coalesced_allocno_live_ranges): Ditto.
+
 2018-01-12  Uros Bizjak  <ubizjak@gmail.com>
 
 	PR target/83628
Index: ira-color.c
===================================================================
--- ira-color.c	(revision 256350)
+++ ira-color.c	(working copy)
@@ -1905,6 +1905,18 @@  assign_hard_reg (ira_allocno_t a, bool r
 /* An array used to sort copies.  */
 static ira_copy_t *sorted_copies;
 
+/* If allocno A is a cap, return non-cap allocno from which A is
+   created.  Otherwise, return A.  */
+static ira_allocno_t
+get_cap_member (ira_allocno_t a)
+{
+  ira_allocno_t member;
+  
+  while ((member = ALLOCNO_CAP_MEMBER (a)) != NULL)
+    a = member;
+  return a;
+}
+
 /* Return TRUE if live ranges of allocnos A1 and A2 intersect.  It is
    used to find a conflict for new allocnos or allocnos with the
    different allocno classes.  */
@@ -1924,6 +1936,10 @@  allocnos_conflict_by_live_ranges_p (ira_
       && ORIGINAL_REGNO (reg1) == ORIGINAL_REGNO (reg2))
     return false;
 
+  /* We don't keep live ranges for caps because they can be quite big.
+     Use ranges of non-cap allocno from which caps are created.  */
+  a1 = get_cap_member (a1);
+  a2 = get_cap_member (a2);
   for (i = 0; i < n1; i++)
     {
       ira_object_t c1 = ALLOCNO_OBJECT (a1, i);
@@ -4027,7 +4043,7 @@  slot_coalesced_allocno_live_ranges_inter
     {
       int i;
       int nr = ALLOCNO_NUM_OBJECTS (a);
-
+      gcc_assert (ALLOCNO_CAP_MEMBER (a) == NULL);
       for (i = 0; i < nr; i++)
 	{
 	  ira_object_t obj = ALLOCNO_OBJECT (a, i);
@@ -4057,6 +4073,7 @@  setup_slot_coalesced_allocno_live_ranges
        a = ALLOCNO_COALESCE_DATA (a)->next)
     {
       int nr = ALLOCNO_NUM_OBJECTS (a);
+      gcc_assert (ALLOCNO_CAP_MEMBER (a) == NULL);
       for (i = 0; i < nr; i++)
 	{
 	  ira_object_t obj = ALLOCNO_OBJECT (a, i);
Index: testsuite/ChangeLog
===================================================================
--- testsuite/ChangeLog	(revision 256589)
+++ testsuite/ChangeLog	(working copy)
@@ -1,3 +1,8 @@ 
+2018-01-12  Vladimir Makarov  <vmakarov@redhat.com>
+
+	PR rtl-optimization/80481
+	* g++.dg/pr80481.C: New.
+
 2018-01-12  Uros Bizjak  <ubizjak@gmail.com>
 
 	PR target/83628
Index: testsuite/g++.dg/pr80481.C
===================================================================
--- testsuite/g++.dg/pr80481.C	(nonexistent)
+++ testsuite/g++.dg/pr80481.C	(working copy)
@@ -0,0 +1,70 @@ 
+// { dg-do compile { target i?86-*-* x86_64-*-* } }
+// { dg-options "-Ofast -funroll-loops -fopenmp -march=knl" }
+// { dg-final { scan-assembler-not "vmovaps" } }
+
+#include <math.h>
+
+#include <xmmintrin.h>
+
+#define max(a, b)   ( (a) > (b) ? (a) : (b) )
+
+struct Sdata {
+  float w; 
+  float s;
+  float r;
+  float t;
+  float v;
+};
+ extern int N1, N2, N3;
+
+#define func(p, up, down) ((p)*(up) + (1.0f-(p)) * (down))
+ 
+void foo (Sdata *in, int idx, float *out)
+{
+  float* y1 = (float*)_mm_malloc(sizeof(float) * N1,16);
+  float* y2  = (float*)_mm_malloc(sizeof(float) * N1,16);
+  float* y3  = (float*)_mm_malloc(sizeof(float) * N1,16);
+  float* y4  = (float*)_mm_malloc(sizeof(float) * N1,16); 
+
+  for (int k = idx; k < idx + N3; k++) {
+    float x1 = in[k].r;
+    float x2    = in[k].s;
+    float x3      = in[k].w;
+    float x4     = in[k].v;
+    float x5         = in[k].t;
+    x5 /= N2;
+    float u = exp(x4 * sqrt(x5));
+    float d = exp(-x4 * sqrt(x5));
+    float a = exp(x1 * x5);
+    float m = exp(-x1 * x5);
+    float p = (a - d) / (u - d);
+    y2[0] = x2;
+    y3[0] = float(1.f);
+    for (int i = 1; i <= N2; i++) {
+      y2[i] = u * y2[i - 1];
+      y3[i] = d * y3[i - 1];
+    }
+#pragma omp simd
+    for (int i = 0; i <= N2; i++) {
+      y1[i] =
+        max((x3 - y2[N2 - i] * y3[i]), float(0.f));
+    }
+    for (int i = N2 - 1; i >= 0; i--) {
+#pragma omp simd
+      for (int j = 0; j <= i; j++) {
+        y4[j] = func(p,y1[j],y1[j+1]) * m;
+      }
+#pragma omp simd
+      for (int j = 0; j <= i; j++) {
+        float t1 = y2[i - j] * y3[j];
+        float t2 = max(x3 - t1, float(0.f));
+        y1[j] = max(t2, y4[j]);
+      }
+    }
+    out[k] = y1[0];
+  }
+  _mm_free(y1);
+  _mm_free(y2);
+  _mm_free(y3);
+  _mm_free(y4);
+}