Patchwork Fix GCC bug causing bootstrap failure with vectorizer turned on

login
register
mail settings
Submitter Cong Hou
Date July 12, 2013, 10:13 p.m.
Message ID <CAK=A3=3vpmrMex1++0p54Z_8f6fHATDLZE+1tAWhKBtXdpdi+g@mail.gmail.com>
Download mbox | patch
Permalink /patch/258813/
State New
Headers show

Comments

Cong Hou - July 12, 2013, 10:13 p.m.
GCC bootstrap failed with loop vectorizer turned on by default at O2.
The symptom is that the comparison between stage2&3 compilers fails.
The root cause is a bug in the file "tree-vect-data-refs.c", where a
qsort() function call is used to sort a group of data references using
a comparison function called "dr_group_sort_cmp()". In this function,
the iterative hash values of tree nodes are used for comparisons. For
a declaration tree node, its UID participates in the calculation of
the hash value. However, a specific declaration may have different
UIDs whether the debug information is switched on/off (-gtoggle). In
consequence, the results of comparisons may vary in stage 2&3 during
bootstrapping.

The following patch fixed the bug. Compiler bootstraps and there is no
regressions in regression test. Compiler also bootstraps fine when
turning on vectorizer by default. Since this patch may produce
difference result (but still correct) than before due to the
modification to the comparison function, four test cases are adjusted
accordingly. OK for trunk?


Cong



stmt UID.  */
Richard Guenther - July 13, 2013, 9:06 a.m.
Cong Hou <congh@google.com> wrote:

>GCC bootstrap failed with loop vectorizer turned on by default at O2.
>The symptom is that the comparison between stage2&3 compilers fails.
>The root cause is a bug in the file "tree-vect-data-refs.c", where a
>qsort() function call is used to sort a group of data references using
>a comparison function called "dr_group_sort_cmp()". In this function,
>the iterative hash values of tree nodes are used for comparisons. For
>a declaration tree node, its UID participates in the calculation of
>the hash value. However, a specific declaration may have different
>UIDs whether the debug information is switched on/off (-gtoggle). In
>consequence, the results of comparisons may vary in stage 2&3 during
>bootstrapping.
>
>The following patch fixed the bug. Compiler bootstraps and there is no
>regressions in regression test. Compiler also bootstraps fine when
>turning on vectorizer by default. Since this patch may produce
>difference result (but still correct) than before due to the
>modification to the comparison function, four test cases are adjusted
>accordingly. OK for trunk?

Where does the order of DRs affect code generation? I think there is the correct place to fix this bug.

Richard.

>
>Cong
>
>
>
>Index: gcc/testsuite/gcc.target/i386/l_fma_double_1.c
>===================================================================
>--- gcc/testsuite/gcc.target/i386/l_fma_double_1.c (revision 200893)
>+++ gcc/testsuite/gcc.target/i386/l_fma_double_1.c (working copy)
>@@ -10,9 +10,9 @@
> #include "l_fma_1.h"
>
> /* { dg-final { scan-assembler-times "vfmadd132pd" 4  } } */
>-/* { dg-final { scan-assembler-times "vfmadd213pd" 4  } } */
>+/* { dg-final { scan-assembler-times "vfmadd231pd" 4  } } */
> /* { dg-final { scan-assembler-times "vfmsub132pd" 4  } } */
>-/* { dg-final { scan-assembler-times "vfmsub213pd" 4  } } */
>+/* { dg-final { scan-assembler-times "vfmsub231pd" 4  } } */
> /* { dg-final { scan-assembler-times "vfnmadd132pd" 4  } } */
> /* { dg-final { scan-assembler-times "vfnmadd231pd" 4  } } */
> /* { dg-final { scan-assembler-times "vfnmsub132pd" 4  } } */
>Index: gcc/testsuite/gcc.target/i386/l_fma_float_1.c
>===================================================================
>--- gcc/testsuite/gcc.target/i386/l_fma_float_1.c (revision 200893)
>+++ gcc/testsuite/gcc.target/i386/l_fma_float_1.c (working copy)
>@@ -9,9 +9,9 @@
> #include "l_fma_1.h"
>
> /* { dg-final { scan-assembler-times "vfmadd132ps" 4  } } */
>-/* { dg-final { scan-assembler-times "vfmadd213ps" 4  } } */
>+/* { dg-final { scan-assembler-times "vfmadd231ps" 4  } } */
> /* { dg-final { scan-assembler-times "vfmsub132ps" 4  } } */
>-/* { dg-final { scan-assembler-times "vfmsub213ps" 4  } } */
>+/* { dg-final { scan-assembler-times "vfmsub231ps" 4  } } */
> /* { dg-final { scan-assembler-times "vfnmadd132ps" 4  } } */
> /* { dg-final { scan-assembler-times "vfnmadd231ps" 4  } } */
> /* { dg-final { scan-assembler-times "vfnmsub132ps" 4  } } */
>Index: gcc/testsuite/gcc.target/i386/l_fma_double_3.c
>===================================================================
>--- gcc/testsuite/gcc.target/i386/l_fma_double_3.c (revision 200893)
>+++ gcc/testsuite/gcc.target/i386/l_fma_double_3.c (working copy)
>@@ -10,9 +10,9 @@
> #include "l_fma_3.h"
>
> /* { dg-final { scan-assembler-times "vfmadd132pd" 4  } } */
>-/* { dg-final { scan-assembler-times "vfmadd213pd" 4  } } */
>+/* { dg-final { scan-assembler-times "vfmadd231pd" 4  } } */
> /* { dg-final { scan-assembler-times "vfmsub132pd" 4  } } */
>-/* { dg-final { scan-assembler-times "vfmsub213pd" 4  } } */
>+/* { dg-final { scan-assembler-times "vfmsub231pd" 4  } } */
> /* { dg-final { scan-assembler-times "vfnmadd132pd" 4  } } */
> /* { dg-final { scan-assembler-times "vfnmadd231pd" 4  } } */
> /* { dg-final { scan-assembler-times "vfnmsub132pd" 4  } } */
>Index: gcc/testsuite/gcc.target/i386/l_fma_float_3.c
>===================================================================
>--- gcc/testsuite/gcc.target/i386/l_fma_float_3.c (revision 200893)
>+++ gcc/testsuite/gcc.target/i386/l_fma_float_3.c (working copy)
>@@ -9,9 +9,9 @@
> #include "l_fma_3.h"
>
> /* { dg-final { scan-assembler-times "vfmadd132ps" 4  } } */
>-/* { dg-final { scan-assembler-times "vfmadd213ps" 4  } } */
>+/* { dg-final { scan-assembler-times "vfmadd231ps" 4  } } */
> /* { dg-final { scan-assembler-times "vfmsub132ps" 4  } } */
>-/* { dg-final { scan-assembler-times "vfmsub213ps" 4  } } */
>+/* { dg-final { scan-assembler-times "vfmsub231ps" 4  } } */
> /* { dg-final { scan-assembler-times "vfnmadd132ps" 4  } } */
> /* { dg-final { scan-assembler-times "vfnmadd231ps" 4  } } */
> /* { dg-final { scan-assembler-times "vfnmsub132ps" 4  } } */
>Index: gcc/tree-vect-data-refs.c
>===================================================================
>--- gcc/tree-vect-data-refs.c (revision 200893)
>+++ gcc/tree-vect-data-refs.c (working copy)
>@@ -2311,6 +2311,80 @@
>   return vect_analyze_group_access (dr);
> }
>
>+
>+
>+/* Compare two tree nodes. This function is used to compare two
>+   data-references as below. */
>+
>+static int
>+compare_tree (tree t1, tree t2)
>+{
>+  int i, cmp;
>+  enum tree_code code;
>+  char tclass;
>+
>+  if (t1 == t2)
>+    return 0;
>+  if (t1 == NULL)
>+    return -1;
>+  if (t2 == NULL)
>+    return 1;
>+
>+
>+  if (TREE_CODE (t1) != TREE_CODE (t2))
>+    return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
>+
>+  code = TREE_CODE (t1);
>+  switch (code)
>+    {
>+    /* For const values, we can just use hash values for comparisons. 
>*/
>+    case INTEGER_CST:
>+    case REAL_CST:
>+    case FIXED_CST:
>+    case STRING_CST:
>+    case COMPLEX_CST:
>+    case VECTOR_CST:
>+      {
>+ hashval_t h1 = iterative_hash_expr (t1, 0);
>+ hashval_t h2 = iterative_hash_expr (t2, 0);
>+ if (h1 != h2)
>+  return h1 < h2 ? -1 : 1;
>+ break;
>+      }
>+
>+    case SSA_NAME:
>+      cmp = compare_tree (SSA_NAME_VAR (t1), SSA_NAME_VAR (t2));
>+      if (cmp != 0)
>+ return cmp;
>+
>+      if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
>+ return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
>+      break;
>+
>+    default:
>+      tclass = TREE_CODE_CLASS (code);
>+
>+      /* For var-decl, we could compare their UIDs.  */
>+      if (tclass == tcc_declaration)
>+ {
>+  if (DECL_UID (t1) != DECL_UID (t2))
>+    return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
>+  break;
>+ }
>+
>+      /* For expressions with operands, compare their operands
>recursively.  */
>+      for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
>+ {
>+  cmp = compare_tree (TREE_OPERAND (t1, i), TREE_OPERAND (t2, i));
>+  if (cmp != 0)
>+    return cmp;
>+ }
>+    }
>+
>+  return 0;
>+}
>+
>+
> /* Compare two data-references DRA and DRB to group them into chunks
>    suitable for grouping.  */
>
>@@ -2319,7 +2393,6 @@
> {
>  data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);
>  data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);
>-  hashval_t h1, h2;
>   int cmp;
>
>   /* Stabilize sort.  */
>@@ -2329,19 +2402,17 @@
>   /* Ordering of DRs according to base.  */
>if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0))
>     {
>-      h1 = iterative_hash_expr (DR_BASE_ADDRESS (dra), 0);
>-      h2 = iterative_hash_expr (DR_BASE_ADDRESS (drb), 0);
>-      if (h1 != h2)
>- return h1 < h2 ? -1 : 1;
>+      cmp = compare_tree (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS
>(drb));
>+      if (cmp != 0)
>+        return cmp;
>     }
>
>   /* And according to DR_OFFSET.  */
>   if (!dr_equal_offsets_p (dra, drb))
>     {
>-      h1 = iterative_hash_expr (DR_OFFSET (dra), 0);
>-      h2 = iterative_hash_expr (DR_OFFSET (drb), 0);
>-      if (h1 != h2)
>- return h1 < h2 ? -1 : 1;
>+      cmp = compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
>+      if (cmp != 0)
>+        return cmp;
>     }
>
>   /* Put reads before writes.  */
>@@ -2352,19 +2423,18 @@
>   if (!operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
>  TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))), 0))
>     {
>-      h1 = iterative_hash_expr (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF
>(dra))), 0);
>-      h2 = iterative_hash_expr (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF
>(drb))), 0);
>-      if (h1 != h2)
>- return h1 < h2 ? -1 : 1;
>+      cmp = compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
>+                          TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
>+      if (cmp != 0)
>+        return cmp;
>     }
>
>   /* And after step.  */
>   if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
>     {
>-      h1 = iterative_hash_expr (DR_STEP (dra), 0);
>-      h2 = iterative_hash_expr (DR_STEP (drb), 0);
>-      if (h1 != h2)
>- return h1 < h2 ? -1 : 1;
>+      cmp = compare_tree (DR_STEP (dra), DR_STEP (drb));
>+      if (cmp != 0)
>+        return cmp;
>     }
>
>   /* Then sort after DR_INIT.  In case of identical DRs sort after
>stmt UID.  */
Richard Guenther - July 15, 2013, 6:05 p.m.
Cong Hou <congh@google.com> wrote:

>Hi Richard
>
>Thank you for you reply. The code generation is affected when
>generating
>conditions for alias checks using the function
>
>static void vect_create_cond_for_alias_checks (loop_vec_info
>loop_vinfo,
>tree * cond_expr)
>
>from the file
>
>*tree-vect-loop-manip.c*
>
>where a loop inside iterates a set of DRs and builds conditions for
>each
>one. However, those DRs are already sorted by the function

Hm, ok.

>bool vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo,
>bb_vec_info
>bb_vinfo)
>
>from the file
>
>*tree-vect-data-refs.c*
>
>and as I mentioned the comparison function used in the sort has some
>problems and our patch is trying to fix it.

Yes, I know - I introduced the lame compare using the hashes...

>If you have any other problem please let me know.

Not at this time.  The patch is ok as-is.

Thanks,
Richard.

>Thank you!
>*
>*
>*
>*
>Cong
>
>
>On Sat, Jul 13, 2013 at 2:06 AM, Richard Biener
><richard.guenther@gmail.com>wrote:
>
>> Cong Hou <congh@google.com> wrote:
>>
>> >GCC bootstrap failed with loop vectorizer turned on by default at
>O2.
>> >The symptom is that the comparison between stage2&3 compilers fails.
>> >The root cause is a bug in the file "tree-vect-data-refs.c", where a
>> >qsort() function call is used to sort a group of data references
>using
>> >a comparison function called "dr_group_sort_cmp()". In this
>function,
>> >the iterative hash values of tree nodes are used for comparisons.
>For
>> >a declaration tree node, its UID participates in the calculation of
>> >the hash value. However, a specific declaration may have different
>> >UIDs whether the debug information is switched on/off (-gtoggle). In
>> >consequence, the results of comparisons may vary in stage 2&3 during
>> >bootstrapping.
>> >
>> >The following patch fixed the bug. Compiler bootstraps and there is
>no
>> >regressions in regression test. Compiler also bootstraps fine when
>> >turning on vectorizer by default. Since this patch may produce
>> >difference result (but still correct) than before due to the
>> >modification to the comparison function, four test cases are
>adjusted
>> >accordingly. OK for trunk?
>>
>> Where does the order of DRs affect code generation? I think there is
>the
>> correct place to fix this bug.
>>
>> Richard.
>>
>> >
>> >Cong
>> >
>> >
>> >
>> >Index: gcc/testsuite/gcc.target/i386/l_fma_double_1.c
>> >===================================================================
>> >--- gcc/testsuite/gcc.target/i386/l_fma_double_1.c (revision 200893)
>> >+++ gcc/testsuite/gcc.target/i386/l_fma_double_1.c (working copy)
>> >@@ -10,9 +10,9 @@
>> > #include "l_fma_1.h"
>> >
>> > /* { dg-final { scan-assembler-times "vfmadd132pd" 4  } } */
>> >-/* { dg-final { scan-assembler-times "vfmadd213pd" 4  } } */
>> >+/* { dg-final { scan-assembler-times "vfmadd231pd" 4  } } */
>> > /* { dg-final { scan-assembler-times "vfmsub132pd" 4  } } */
>> >-/* { dg-final { scan-assembler-times "vfmsub213pd" 4  } } */
>> >+/* { dg-final { scan-assembler-times "vfmsub231pd" 4  } } */
>> > /* { dg-final { scan-assembler-times "vfnmadd132pd" 4  } } */
>> > /* { dg-final { scan-assembler-times "vfnmadd231pd" 4  } } */
>> > /* { dg-final { scan-assembler-times "vfnmsub132pd" 4  } } */
>> >Index: gcc/testsuite/gcc.target/i386/l_fma_float_1.c
>> >===================================================================
>> >--- gcc/testsuite/gcc.target/i386/l_fma_float_1.c (revision 200893)
>> >+++ gcc/testsuite/gcc.target/i386/l_fma_float_1.c (working copy)
>> >@@ -9,9 +9,9 @@
>> > #include "l_fma_1.h"
>> >
>> > /* { dg-final { scan-assembler-times "vfmadd132ps" 4  } } */
>> >-/* { dg-final { scan-assembler-times "vfmadd213ps" 4  } } */
>> >+/* { dg-final { scan-assembler-times "vfmadd231ps" 4  } } */
>> > /* { dg-final { scan-assembler-times "vfmsub132ps" 4  } } */
>> >-/* { dg-final { scan-assembler-times "vfmsub213ps" 4  } } */
>> >+/* { dg-final { scan-assembler-times "vfmsub231ps" 4  } } */
>> > /* { dg-final { scan-assembler-times "vfnmadd132ps" 4  } } */
>> > /* { dg-final { scan-assembler-times "vfnmadd231ps" 4  } } */
>> > /* { dg-final { scan-assembler-times "vfnmsub132ps" 4  } } */
>> >Index: gcc/testsuite/gcc.target/i386/l_fma_double_3.c
>> >===================================================================
>> >--- gcc/testsuite/gcc.target/i386/l_fma_double_3.c (revision 200893)
>> >+++ gcc/testsuite/gcc.target/i386/l_fma_double_3.c (working copy)
>> >@@ -10,9 +10,9 @@
>> > #include "l_fma_3.h"
>> >
>> > /* { dg-final { scan-assembler-times "vfmadd132pd" 4  } } */
>> >-/* { dg-final { scan-assembler-times "vfmadd213pd" 4  } } */
>> >+/* { dg-final { scan-assembler-times "vfmadd231pd" 4  } } */
>> > /* { dg-final { scan-assembler-times "vfmsub132pd" 4  } } */
>> >-/* { dg-final { scan-assembler-times "vfmsub213pd" 4  } } */
>> >+/* { dg-final { scan-assembler-times "vfmsub231pd" 4  } } */
>> > /* { dg-final { scan-assembler-times "vfnmadd132pd" 4  } } */
>> > /* { dg-final { scan-assembler-times "vfnmadd231pd" 4  } } */
>> > /* { dg-final { scan-assembler-times "vfnmsub132pd" 4  } } */
>> >Index: gcc/testsuite/gcc.target/i386/l_fma_float_3.c
>> >===================================================================
>> >--- gcc/testsuite/gcc.target/i386/l_fma_float_3.c (revision 200893)
>> >+++ gcc/testsuite/gcc.target/i386/l_fma_float_3.c (working copy)
>> >@@ -9,9 +9,9 @@
>> > #include "l_fma_3.h"
>> >
>> > /* { dg-final { scan-assembler-times "vfmadd132ps" 4  } } */
>> >-/* { dg-final { scan-assembler-times "vfmadd213ps" 4  } } */
>> >+/* { dg-final { scan-assembler-times "vfmadd231ps" 4  } } */
>> > /* { dg-final { scan-assembler-times "vfmsub132ps" 4  } } */
>> >-/* { dg-final { scan-assembler-times "vfmsub213ps" 4  } } */
>> >+/* { dg-final { scan-assembler-times "vfmsub231ps" 4  } } */
>> > /* { dg-final { scan-assembler-times "vfnmadd132ps" 4  } } */
>> > /* { dg-final { scan-assembler-times "vfnmadd231ps" 4  } } */
>> > /* { dg-final { scan-assembler-times "vfnmsub132ps" 4  } } */
>> >Index: gcc/tree-vect-data-refs.c
>> >===================================================================
>> >--- gcc/tree-vect-data-refs.c (revision 200893)
>> >+++ gcc/tree-vect-data-refs.c (working copy)
>> >@@ -2311,6 +2311,80 @@
>> >   return vect_analyze_group_access (dr);
>> > }
>> >
>> >+
>> >+
>> >+/* Compare two tree nodes. This function is used to compare two
>> >+   data-references as below. */
>> >+
>> >+static int
>> >+compare_tree (tree t1, tree t2)
>> >+{
>> >+  int i, cmp;
>> >+  enum tree_code code;
>> >+  char tclass;
>> >+
>> >+  if (t1 == t2)
>> >+    return 0;
>> >+  if (t1 == NULL)
>> >+    return -1;
>> >+  if (t2 == NULL)
>> >+    return 1;
>> >+
>> >+
>> >+  if (TREE_CODE (t1) != TREE_CODE (t2))
>> >+    return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
>> >+
>> >+  code = TREE_CODE (t1);
>> >+  switch (code)
>> >+    {
>> >+    /* For const values, we can just use hash values for
>comparisons.
>> >*/
>> >+    case INTEGER_CST:
>> >+    case REAL_CST:
>> >+    case FIXED_CST:
>> >+    case STRING_CST:
>> >+    case COMPLEX_CST:
>> >+    case VECTOR_CST:
>> >+      {
>> >+ hashval_t h1 = iterative_hash_expr (t1, 0);
>> >+ hashval_t h2 = iterative_hash_expr (t2, 0);
>> >+ if (h1 != h2)
>> >+  return h1 < h2 ? -1 : 1;
>> >+ break;
>> >+      }
>> >+
>> >+    case SSA_NAME:
>> >+      cmp = compare_tree (SSA_NAME_VAR (t1), SSA_NAME_VAR (t2));
>> >+      if (cmp != 0)
>> >+ return cmp;
>> >+
>> >+      if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
>> >+ return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
>> >+      break;
>> >+
>> >+    default:
>> >+      tclass = TREE_CODE_CLASS (code);
>> >+
>> >+      /* For var-decl, we could compare their UIDs.  */
>> >+      if (tclass == tcc_declaration)
>> >+ {
>> >+  if (DECL_UID (t1) != DECL_UID (t2))
>> >+    return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
>> >+  break;
>> >+ }
>> >+
>> >+      /* For expressions with operands, compare their operands
>> >recursively.  */
>> >+      for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
>> >+ {
>> >+  cmp = compare_tree (TREE_OPERAND (t1, i), TREE_OPERAND (t2, i));
>> >+  if (cmp != 0)
>> >+    return cmp;
>> >+ }
>> >+    }
>> >+
>> >+  return 0;
>> >+}
>> >+
>> >+
>> > /* Compare two data-references DRA and DRB to group them into
>chunks
>> >    suitable for grouping.  */
>> >
>> >@@ -2319,7 +2393,6 @@
>> > {
>> >  data_reference_p dra = *(data_reference_p *)const_cast<void
>*>(dra_);
>> >  data_reference_p drb = *(data_reference_p *)const_cast<void
>*>(drb_);
>> >-  hashval_t h1, h2;
>> >   int cmp;
>> >
>> >   /* Stabilize sort.  */
>> >@@ -2329,19 +2402,17 @@
>> >   /* Ordering of DRs according to base.  */
>> >if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb),
>0))
>> >     {
>> >-      h1 = iterative_hash_expr (DR_BASE_ADDRESS (dra), 0);
>> >-      h2 = iterative_hash_expr (DR_BASE_ADDRESS (drb), 0);
>> >-      if (h1 != h2)
>> >- return h1 < h2 ? -1 : 1;
>> >+      cmp = compare_tree (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS
>> >(drb));
>> >+      if (cmp != 0)
>> >+        return cmp;
>> >     }
>> >
>> >   /* And according to DR_OFFSET.  */
>> >   if (!dr_equal_offsets_p (dra, drb))
>> >     {
>> >-      h1 = iterative_hash_expr (DR_OFFSET (dra), 0);
>> >-      h2 = iterative_hash_expr (DR_OFFSET (drb), 0);
>> >-      if (h1 != h2)
>> >- return h1 < h2 ? -1 : 1;
>> >+      cmp = compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
>> >+      if (cmp != 0)
>> >+        return cmp;
>> >     }
>> >
>> >   /* Put reads before writes.  */
>> >@@ -2352,19 +2423,18 @@
>> >   if (!operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
>> >  TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))), 0))
>> >     {
>> >-      h1 = iterative_hash_expr (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF
>> >(dra))), 0);
>> >-      h2 = iterative_hash_expr (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF
>> >(drb))), 0);
>> >-      if (h1 != h2)
>> >- return h1 < h2 ? -1 : 1;
>> >+      cmp = compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF
>(dra))),
>> >+                          TYPE_SIZE_UNIT (TREE_TYPE (DR_REF
>(drb))));
>> >+      if (cmp != 0)
>> >+        return cmp;
>> >     }
>> >
>> >   /* And after step.  */
>> >   if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
>> >     {
>> >-      h1 = iterative_hash_expr (DR_STEP (dra), 0);
>> >-      h2 = iterative_hash_expr (DR_STEP (drb), 0);
>> >-      if (h1 != h2)
>> >- return h1 < h2 ? -1 : 1;
>> >+      cmp = compare_tree (DR_STEP (dra), DR_STEP (drb));
>> >+      if (cmp != 0)
>> >+        return cmp;
>> >     }
>> >
>> >   /* Then sort after DR_INIT.  In case of identical DRs sort after
>> >stmt UID.  */
>>
>>
>>
Jeff Law - July 18, 2013, 4:45 p.m.
On 07/12/2013 04:13 PM, Cong Hou wrote:
> GCC bootstrap failed with loop vectorizer turned on by default at O2.
> The symptom is that the comparison between stage2&3 compilers fails.
> The root cause is a bug in the file "tree-vect-data-refs.c", where a
> qsort() function call is used to sort a group of data references using
> a comparison function called "dr_group_sort_cmp()". In this function,
> the iterative hash values of tree nodes are used for comparisons. For
> a declaration tree node, its UID participates in the calculation of
> the hash value. However, a specific declaration may have different
> UIDs whether the debug information is switched on/off (-gtoggle). In
> consequence, the results of comparisons may vary in stage 2&3 during
> bootstrapping.
>
> The following patch fixed the bug. Compiler bootstraps and there is no
> regressions in regression test. Compiler also bootstraps fine when
> turning on vectorizer by default. Since this patch may produce
> difference result (but still correct) than before due to the
> modification to the comparison function, four test cases are adjusted
> accordingly. OK for trunk?
If I understand you correctly, you're claiming that the DECL_UID can 
vary based on whether or not we're emitting debug info.  This can be 
seen in iterative_hash_expr:


    default:
       tclass = TREE_CODE_CLASS (code);

       if (tclass == tcc_declaration)
         {
           /* DECL's have a unique ID */
           val = iterative_hash_host_wide_int (DECL_UID (t), val);
         }


What doesn't make sense to me is your compare_tree looks at the UIDs 
just as well:



> +
> +    default:
> +      tclass = TREE_CODE_CLASS (code);
> +
> +      /* For var-decl, we could compare their UIDs.  */
> +      if (tclass == tcc_declaration)
> + {
> +  if (DECL_UID (t1) != DECL_UID (t2))
> +    return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
> +  break;
> + }
> +
>


Why does this work while using iterative_hash_expr fail?

Clearly I'm mis-understanding something.

jeff
Xinliang David Li - July 18, 2013, 4:49 p.m.
The difference is that the relative order of DECL_UIDs do not change
whether debug info is on or not, but there is no such guarantee when
hashing is involved.

David

On Thu, Jul 18, 2013 at 9:45 AM, Jeff Law <law@redhat.com> wrote:
> On 07/12/2013 04:13 PM, Cong Hou wrote:
>>
>> GCC bootstrap failed with loop vectorizer turned on by default at O2.
>> The symptom is that the comparison between stage2&3 compilers fails.
>> The root cause is a bug in the file "tree-vect-data-refs.c", where a
>> qsort() function call is used to sort a group of data references using
>> a comparison function called "dr_group_sort_cmp()". In this function,
>> the iterative hash values of tree nodes are used for comparisons. For
>> a declaration tree node, its UID participates in the calculation of
>> the hash value. However, a specific declaration may have different
>> UIDs whether the debug information is switched on/off (-gtoggle). In
>> consequence, the results of comparisons may vary in stage 2&3 during
>> bootstrapping.
>>
>> The following patch fixed the bug. Compiler bootstraps and there is no
>> regressions in regression test. Compiler also bootstraps fine when
>> turning on vectorizer by default. Since this patch may produce
>> difference result (but still correct) than before due to the
>> modification to the comparison function, four test cases are adjusted
>> accordingly. OK for trunk?
>
> If I understand you correctly, you're claiming that the DECL_UID can vary
> based on whether or not we're emitting debug info.  This can be seen in
> iterative_hash_expr:
>
>
>    default:
>       tclass = TREE_CODE_CLASS (code);
>
>       if (tclass == tcc_declaration)
>         {
>           /* DECL's have a unique ID */
>           val = iterative_hash_host_wide_int (DECL_UID (t), val);
>         }
>
>
> What doesn't make sense to me is your compare_tree looks at the UIDs just as
> well:
>
>
>
>
>> +
>> +    default:
>> +      tclass = TREE_CODE_CLASS (code);
>> +
>> +      /* For var-decl, we could compare their UIDs.  */
>> +      if (tclass == tcc_declaration)
>> + {
>> +  if (DECL_UID (t1) != DECL_UID (t2))
>> +    return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
>> +  break;
>> + }
>> +
>>
>
>
> Why does this work while using iterative_hash_expr fail?
>
> Clearly I'm mis-understanding something.
>
> jeff
Jakub Jelinek - July 18, 2013, 5:18 p.m.
On Thu, Jul 18, 2013 at 10:45:19AM -0600, Jeff Law wrote:
> If I understand you correctly, you're claiming that the DECL_UID can
> vary based on whether or not we're emitting debug info.  This can be
> seen in iterative_hash_expr:

Yes, for e.g. SSA_NAME_VERSION, we require that it is the same in between
-g -fvar-tracking-assignments and -g0, for DECL_UIDs, only the relative
ordering must be preserved, as in, for VTA the gaps between DECL_UID
of decls seen both in -g0 and -g code can be bigger.
So -g0 can use for decls a, c, b uids say 12 13 14, while -g can use
12 27 39.  Thus hashing the DECL_UID if e.g. the hash table is later on
traversed could lead to -fcompare-debug failures.

	Jakub
Jeff Law - July 19, 2013, 12:45 p.m.
On 07/18/2013 10:49 AM, Xinliang David Li wrote:
> The difference is that the relative order of DECL_UIDs do not change
> whether debug info is on or not, but there is no such guarantee when
> hashing is involved.
Yea.  I feel like an idiot for missing the hash vs comparison.

Add a ChangeLog and this is good to go.

Jeff
Xinliang David Li - July 19, 2013, 3:57 p.m.
We all missed it when just staring at the code :)

David

On Fri, Jul 19, 2013 at 5:45 AM, Jeff Law <law@redhat.com> wrote:
> On 07/18/2013 10:49 AM, Xinliang David Li wrote:
>>
>> The difference is that the relative order of DECL_UIDs do not change
>> whether debug info is on or not, but there is no such guarantee when
>> hashing is involved.
>
> Yea.  I feel like an idiot for missing the hash vs comparison.
>
> Add a ChangeLog and this is good to go.
>
> Jeff
>

Patch

Index: gcc/testsuite/gcc.target/i386/l_fma_double_1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/l_fma_double_1.c (revision 200893)
+++ gcc/testsuite/gcc.target/i386/l_fma_double_1.c (working copy)
@@ -10,9 +10,9 @@ 
 #include "l_fma_1.h"

 /* { dg-final { scan-assembler-times "vfmadd132pd" 4  } } */
-/* { dg-final { scan-assembler-times "vfmadd213pd" 4  } } */
+/* { dg-final { scan-assembler-times "vfmadd231pd" 4  } } */
 /* { dg-final { scan-assembler-times "vfmsub132pd" 4  } } */
-/* { dg-final { scan-assembler-times "vfmsub213pd" 4  } } */
+/* { dg-final { scan-assembler-times "vfmsub231pd" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmadd132pd" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmadd231pd" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132pd" 4  } } */
Index: gcc/testsuite/gcc.target/i386/l_fma_float_1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/l_fma_float_1.c (revision 200893)
+++ gcc/testsuite/gcc.target/i386/l_fma_float_1.c (working copy)
@@ -9,9 +9,9 @@ 
 #include "l_fma_1.h"

 /* { dg-final { scan-assembler-times "vfmadd132ps" 4  } } */
-/* { dg-final { scan-assembler-times "vfmadd213ps" 4  } } */
+/* { dg-final { scan-assembler-times "vfmadd231ps" 4  } } */
 /* { dg-final { scan-assembler-times "vfmsub132ps" 4  } } */
-/* { dg-final { scan-assembler-times "vfmsub213ps" 4  } } */
+/* { dg-final { scan-assembler-times "vfmsub231ps" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmadd132ps" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmadd231ps" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132ps" 4  } } */
Index: gcc/testsuite/gcc.target/i386/l_fma_double_3.c
===================================================================
--- gcc/testsuite/gcc.target/i386/l_fma_double_3.c (revision 200893)
+++ gcc/testsuite/gcc.target/i386/l_fma_double_3.c (working copy)
@@ -10,9 +10,9 @@ 
 #include "l_fma_3.h"

 /* { dg-final { scan-assembler-times "vfmadd132pd" 4  } } */
-/* { dg-final { scan-assembler-times "vfmadd213pd" 4  } } */
+/* { dg-final { scan-assembler-times "vfmadd231pd" 4  } } */
 /* { dg-final { scan-assembler-times "vfmsub132pd" 4  } } */
-/* { dg-final { scan-assembler-times "vfmsub213pd" 4  } } */
+/* { dg-final { scan-assembler-times "vfmsub231pd" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmadd132pd" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmadd231pd" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132pd" 4  } } */
Index: gcc/testsuite/gcc.target/i386/l_fma_float_3.c
===================================================================
--- gcc/testsuite/gcc.target/i386/l_fma_float_3.c (revision 200893)
+++ gcc/testsuite/gcc.target/i386/l_fma_float_3.c (working copy)
@@ -9,9 +9,9 @@ 
 #include "l_fma_3.h"

 /* { dg-final { scan-assembler-times "vfmadd132ps" 4  } } */
-/* { dg-final { scan-assembler-times "vfmadd213ps" 4  } } */
+/* { dg-final { scan-assembler-times "vfmadd231ps" 4  } } */
 /* { dg-final { scan-assembler-times "vfmsub132ps" 4  } } */
-/* { dg-final { scan-assembler-times "vfmsub213ps" 4  } } */
+/* { dg-final { scan-assembler-times "vfmsub231ps" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmadd132ps" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmadd231ps" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132ps" 4  } } */
Index: gcc/tree-vect-data-refs.c
===================================================================
--- gcc/tree-vect-data-refs.c (revision 200893)
+++ gcc/tree-vect-data-refs.c (working copy)
@@ -2311,6 +2311,80 @@ 
   return vect_analyze_group_access (dr);
 }

+
+
+/* Compare two tree nodes. This function is used to compare two
+   data-references as below. */
+
+static int
+compare_tree (tree t1, tree t2)
+{
+  int i, cmp;
+  enum tree_code code;
+  char tclass;
+
+  if (t1 == t2)
+    return 0;
+  if (t1 == NULL)
+    return -1;
+  if (t2 == NULL)
+    return 1;
+
+
+  if (TREE_CODE (t1) != TREE_CODE (t2))
+    return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
+
+  code = TREE_CODE (t1);
+  switch (code)
+    {
+    /* For const values, we can just use hash values for comparisons.  */
+    case INTEGER_CST:
+    case REAL_CST:
+    case FIXED_CST:
+    case STRING_CST:
+    case COMPLEX_CST:
+    case VECTOR_CST:
+      {
+ hashval_t h1 = iterative_hash_expr (t1, 0);
+ hashval_t h2 = iterative_hash_expr (t2, 0);
+ if (h1 != h2)
+  return h1 < h2 ? -1 : 1;
+ break;
+      }
+
+    case SSA_NAME:
+      cmp = compare_tree (SSA_NAME_VAR (t1), SSA_NAME_VAR (t2));
+      if (cmp != 0)
+ return cmp;
+
+      if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
+ return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
+      break;
+
+    default:
+      tclass = TREE_CODE_CLASS (code);
+
+      /* For var-decl, we could compare their UIDs.  */
+      if (tclass == tcc_declaration)
+ {
+  if (DECL_UID (t1) != DECL_UID (t2))
+    return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
+  break;
+ }
+
+      /* For expressions with operands, compare their operands recursively.  */
+      for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
+ {
+  cmp = compare_tree (TREE_OPERAND (t1, i), TREE_OPERAND (t2, i));
+  if (cmp != 0)
+    return cmp;
+ }
+    }
+
+  return 0;
+}
+
+
 /* Compare two data-references DRA and DRB to group them into chunks
    suitable for grouping.  */

@@ -2319,7 +2393,6 @@ 
 {
   data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);
   data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);
-  hashval_t h1, h2;
   int cmp;

   /* Stabilize sort.  */
@@ -2329,19 +2402,17 @@ 
   /* Ordering of DRs according to base.  */
   if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0))
     {
-      h1 = iterative_hash_expr (DR_BASE_ADDRESS (dra), 0);
-      h2 = iterative_hash_expr (DR_BASE_ADDRESS (drb), 0);
-      if (h1 != h2)
- return h1 < h2 ? -1 : 1;
+      cmp = compare_tree (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb));
+      if (cmp != 0)
+        return cmp;
     }

   /* And according to DR_OFFSET.  */
   if (!dr_equal_offsets_p (dra, drb))
     {
-      h1 = iterative_hash_expr (DR_OFFSET (dra), 0);
-      h2 = iterative_hash_expr (DR_OFFSET (drb), 0);
-      if (h1 != h2)
- return h1 < h2 ? -1 : 1;
+      cmp = compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
+      if (cmp != 0)
+        return cmp;
     }

   /* Put reads before writes.  */
@@ -2352,19 +2423,18 @@ 
   if (!operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
  TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))), 0))
     {
-      h1 = iterative_hash_expr (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))), 0);
-      h2 = iterative_hash_expr (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))), 0);
-      if (h1 != h2)
- return h1 < h2 ? -1 : 1;
+      cmp = compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
+                          TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
+      if (cmp != 0)
+        return cmp;
     }

   /* And after step.  */
   if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
     {
-      h1 = iterative_hash_expr (DR_STEP (dra), 0);
-      h2 = iterative_hash_expr (DR_STEP (drb), 0);
-      if (h1 != h2)
- return h1 < h2 ? -1 : 1;
+      cmp = compare_tree (DR_STEP (dra), DR_STEP (drb));
+      if (cmp != 0)
+        return cmp;
     }

   /* Then sort after DR_INIT.  In case of identical DRs sort after