Message ID | CAK=A3=3vpmrMex1++0p54Z_8f6fHATDLZE+1tAWhKBtXdpdi+g@mail.gmail.com |
---|---|
State | New |
Headers | show |
Cong Hou <congh@google.com> wrote: >GCC bootstrap failed with loop vectorizer turned on by default at O2. >The symptom is that the comparison between stage2&3 compilers fails. >The root cause is a bug in the file "tree-vect-data-refs.c", where a >qsort() function call is used to sort a group of data references using >a comparison function called "dr_group_sort_cmp()". In this function, >the iterative hash values of tree nodes are used for comparisons. For >a declaration tree node, its UID participates in the calculation of >the hash value. However, a specific declaration may have different >UIDs whether the debug information is switched on/off (-gtoggle). In >consequence, the results of comparisons may vary in stage 2&3 during >bootstrapping. > >The following patch fixed the bug. Compiler bootstraps and there is no >regressions in regression test. Compiler also bootstraps fine when >turning on vectorizer by default. Since this patch may produce >difference result (but still correct) than before due to the >modification to the comparison function, four test cases are adjusted >accordingly. OK for trunk? Where does the order of DRs affect code generation? I think there is the correct place to fix this bug. Richard. > >Cong > > > >Index: gcc/testsuite/gcc.target/i386/l_fma_double_1.c >=================================================================== >--- gcc/testsuite/gcc.target/i386/l_fma_double_1.c (revision 200893) >+++ gcc/testsuite/gcc.target/i386/l_fma_double_1.c (working copy) >@@ -10,9 +10,9 @@ > #include "l_fma_1.h" > > /* { dg-final { scan-assembler-times "vfmadd132pd" 4 } } */ >-/* { dg-final { scan-assembler-times "vfmadd213pd" 4 } } */ >+/* { dg-final { scan-assembler-times "vfmadd231pd" 4 } } */ > /* { dg-final { scan-assembler-times "vfmsub132pd" 4 } } */ >-/* { dg-final { scan-assembler-times "vfmsub213pd" 4 } } */ >+/* { dg-final { scan-assembler-times "vfmsub231pd" 4 } } */ > /* { dg-final { scan-assembler-times "vfnmadd132pd" 4 } } */ > /* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */ > /* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */ >Index: gcc/testsuite/gcc.target/i386/l_fma_float_1.c >=================================================================== >--- gcc/testsuite/gcc.target/i386/l_fma_float_1.c (revision 200893) >+++ gcc/testsuite/gcc.target/i386/l_fma_float_1.c (working copy) >@@ -9,9 +9,9 @@ > #include "l_fma_1.h" > > /* { dg-final { scan-assembler-times "vfmadd132ps" 4 } } */ >-/* { dg-final { scan-assembler-times "vfmadd213ps" 4 } } */ >+/* { dg-final { scan-assembler-times "vfmadd231ps" 4 } } */ > /* { dg-final { scan-assembler-times "vfmsub132ps" 4 } } */ >-/* { dg-final { scan-assembler-times "vfmsub213ps" 4 } } */ >+/* { dg-final { scan-assembler-times "vfmsub231ps" 4 } } */ > /* { dg-final { scan-assembler-times "vfnmadd132ps" 4 } } */ > /* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */ > /* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */ >Index: gcc/testsuite/gcc.target/i386/l_fma_double_3.c >=================================================================== >--- gcc/testsuite/gcc.target/i386/l_fma_double_3.c (revision 200893) >+++ gcc/testsuite/gcc.target/i386/l_fma_double_3.c (working copy) >@@ -10,9 +10,9 @@ > #include "l_fma_3.h" > > /* { dg-final { scan-assembler-times "vfmadd132pd" 4 } } */ >-/* { dg-final { scan-assembler-times "vfmadd213pd" 4 } } */ >+/* { dg-final { scan-assembler-times "vfmadd231pd" 4 } } */ > /* { dg-final { scan-assembler-times "vfmsub132pd" 4 } } */ >-/* { dg-final { scan-assembler-times "vfmsub213pd" 4 } } */ >+/* { dg-final { scan-assembler-times "vfmsub231pd" 4 } } */ > /* { dg-final { scan-assembler-times "vfnmadd132pd" 4 } } */ > /* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */ > /* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */ >Index: gcc/testsuite/gcc.target/i386/l_fma_float_3.c >=================================================================== >--- gcc/testsuite/gcc.target/i386/l_fma_float_3.c (revision 200893) >+++ gcc/testsuite/gcc.target/i386/l_fma_float_3.c (working copy) >@@ -9,9 +9,9 @@ > #include "l_fma_3.h" > > /* { dg-final { scan-assembler-times "vfmadd132ps" 4 } } */ >-/* { dg-final { scan-assembler-times "vfmadd213ps" 4 } } */ >+/* { dg-final { scan-assembler-times "vfmadd231ps" 4 } } */ > /* { dg-final { scan-assembler-times "vfmsub132ps" 4 } } */ >-/* { dg-final { scan-assembler-times "vfmsub213ps" 4 } } */ >+/* { dg-final { scan-assembler-times "vfmsub231ps" 4 } } */ > /* { dg-final { scan-assembler-times "vfnmadd132ps" 4 } } */ > /* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */ > /* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */ >Index: gcc/tree-vect-data-refs.c >=================================================================== >--- gcc/tree-vect-data-refs.c (revision 200893) >+++ gcc/tree-vect-data-refs.c (working copy) >@@ -2311,6 +2311,80 @@ > return vect_analyze_group_access (dr); > } > >+ >+ >+/* Compare two tree nodes. This function is used to compare two >+ data-references as below. */ >+ >+static int >+compare_tree (tree t1, tree t2) >+{ >+ int i, cmp; >+ enum tree_code code; >+ char tclass; >+ >+ if (t1 == t2) >+ return 0; >+ if (t1 == NULL) >+ return -1; >+ if (t2 == NULL) >+ return 1; >+ >+ >+ if (TREE_CODE (t1) != TREE_CODE (t2)) >+ return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1; >+ >+ code = TREE_CODE (t1); >+ switch (code) >+ { >+ /* For const values, we can just use hash values for comparisons. >*/ >+ case INTEGER_CST: >+ case REAL_CST: >+ case FIXED_CST: >+ case STRING_CST: >+ case COMPLEX_CST: >+ case VECTOR_CST: >+ { >+ hashval_t h1 = iterative_hash_expr (t1, 0); >+ hashval_t h2 = iterative_hash_expr (t2, 0); >+ if (h1 != h2) >+ return h1 < h2 ? -1 : 1; >+ break; >+ } >+ >+ case SSA_NAME: >+ cmp = compare_tree (SSA_NAME_VAR (t1), SSA_NAME_VAR (t2)); >+ if (cmp != 0) >+ return cmp; >+ >+ if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2)) >+ return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1; >+ break; >+ >+ default: >+ tclass = TREE_CODE_CLASS (code); >+ >+ /* For var-decl, we could compare their UIDs. */ >+ if (tclass == tcc_declaration) >+ { >+ if (DECL_UID (t1) != DECL_UID (t2)) >+ return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1; >+ break; >+ } >+ >+ /* For expressions with operands, compare their operands >recursively. */ >+ for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i) >+ { >+ cmp = compare_tree (TREE_OPERAND (t1, i), TREE_OPERAND (t2, i)); >+ if (cmp != 0) >+ return cmp; >+ } >+ } >+ >+ return 0; >+} >+ >+ > /* Compare two data-references DRA and DRB to group them into chunks > suitable for grouping. */ > >@@ -2319,7 +2393,6 @@ > { > data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_); > data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_); >- hashval_t h1, h2; > int cmp; > > /* Stabilize sort. */ >@@ -2329,19 +2402,17 @@ > /* Ordering of DRs according to base. */ >if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)) > { >- h1 = iterative_hash_expr (DR_BASE_ADDRESS (dra), 0); >- h2 = iterative_hash_expr (DR_BASE_ADDRESS (drb), 0); >- if (h1 != h2) >- return h1 < h2 ? -1 : 1; >+ cmp = compare_tree (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS >(drb)); >+ if (cmp != 0) >+ return cmp; > } > > /* And according to DR_OFFSET. */ > if (!dr_equal_offsets_p (dra, drb)) > { >- h1 = iterative_hash_expr (DR_OFFSET (dra), 0); >- h2 = iterative_hash_expr (DR_OFFSET (drb), 0); >- if (h1 != h2) >- return h1 < h2 ? -1 : 1; >+ cmp = compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)); >+ if (cmp != 0) >+ return cmp; > } > > /* Put reads before writes. */ >@@ -2352,19 +2423,18 @@ > if (!operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))), > TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))), 0)) > { >- h1 = iterative_hash_expr (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF >(dra))), 0); >- h2 = iterative_hash_expr (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF >(drb))), 0); >- if (h1 != h2) >- return h1 < h2 ? -1 : 1; >+ cmp = compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))), >+ TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)))); >+ if (cmp != 0) >+ return cmp; > } > > /* And after step. */ > if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0)) > { >- h1 = iterative_hash_expr (DR_STEP (dra), 0); >- h2 = iterative_hash_expr (DR_STEP (drb), 0); >- if (h1 != h2) >- return h1 < h2 ? -1 : 1; >+ cmp = compare_tree (DR_STEP (dra), DR_STEP (drb)); >+ if (cmp != 0) >+ return cmp; > } > > /* Then sort after DR_INIT. In case of identical DRs sort after >stmt UID. */
Cong Hou <congh@google.com> wrote: >Hi Richard > >Thank you for you reply. The code generation is affected when >generating >conditions for alias checks using the function > >static void vect_create_cond_for_alias_checks (loop_vec_info >loop_vinfo, >tree * cond_expr) > >from the file > >*tree-vect-loop-manip.c* > >where a loop inside iterates a set of DRs and builds conditions for >each >one. However, those DRs are already sorted by the function Hm, ok. >bool vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, >bb_vec_info >bb_vinfo) > >from the file > >*tree-vect-data-refs.c* > >and as I mentioned the comparison function used in the sort has some >problems and our patch is trying to fix it. Yes, I know - I introduced the lame compare using the hashes... >If you have any other problem please let me know. Not at this time. The patch is ok as-is. Thanks, Richard. >Thank you! >* >* >* >* >Cong > > >On Sat, Jul 13, 2013 at 2:06 AM, Richard Biener ><richard.guenther@gmail.com>wrote: > >> Cong Hou <congh@google.com> wrote: >> >> >GCC bootstrap failed with loop vectorizer turned on by default at >O2. >> >The symptom is that the comparison between stage2&3 compilers fails. >> >The root cause is a bug in the file "tree-vect-data-refs.c", where a >> >qsort() function call is used to sort a group of data references >using >> >a comparison function called "dr_group_sort_cmp()". In this >function, >> >the iterative hash values of tree nodes are used for comparisons. >For >> >a declaration tree node, its UID participates in the calculation of >> >the hash value. However, a specific declaration may have different >> >UIDs whether the debug information is switched on/off (-gtoggle). In >> >consequence, the results of comparisons may vary in stage 2&3 during >> >bootstrapping. >> > >> >The following patch fixed the bug. Compiler bootstraps and there is >no >> >regressions in regression test. Compiler also bootstraps fine when >> >turning on vectorizer by default. Since this patch may produce >> >difference result (but still correct) than before due to the >> >modification to the comparison function, four test cases are >adjusted >> >accordingly. OK for trunk? >> >> Where does the order of DRs affect code generation? I think there is >the >> correct place to fix this bug. >> >> Richard. >> >> > >> >Cong >> > >> > >> > >> >Index: gcc/testsuite/gcc.target/i386/l_fma_double_1.c >> >=================================================================== >> >--- gcc/testsuite/gcc.target/i386/l_fma_double_1.c (revision 200893) >> >+++ gcc/testsuite/gcc.target/i386/l_fma_double_1.c (working copy) >> >@@ -10,9 +10,9 @@ >> > #include "l_fma_1.h" >> > >> > /* { dg-final { scan-assembler-times "vfmadd132pd" 4 } } */ >> >-/* { dg-final { scan-assembler-times "vfmadd213pd" 4 } } */ >> >+/* { dg-final { scan-assembler-times "vfmadd231pd" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfmsub132pd" 4 } } */ >> >-/* { dg-final { scan-assembler-times "vfmsub213pd" 4 } } */ >> >+/* { dg-final { scan-assembler-times "vfmsub231pd" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfnmadd132pd" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */ >> >Index: gcc/testsuite/gcc.target/i386/l_fma_float_1.c >> >=================================================================== >> >--- gcc/testsuite/gcc.target/i386/l_fma_float_1.c (revision 200893) >> >+++ gcc/testsuite/gcc.target/i386/l_fma_float_1.c (working copy) >> >@@ -9,9 +9,9 @@ >> > #include "l_fma_1.h" >> > >> > /* { dg-final { scan-assembler-times "vfmadd132ps" 4 } } */ >> >-/* { dg-final { scan-assembler-times "vfmadd213ps" 4 } } */ >> >+/* { dg-final { scan-assembler-times "vfmadd231ps" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfmsub132ps" 4 } } */ >> >-/* { dg-final { scan-assembler-times "vfmsub213ps" 4 } } */ >> >+/* { dg-final { scan-assembler-times "vfmsub231ps" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfnmadd132ps" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */ >> >Index: gcc/testsuite/gcc.target/i386/l_fma_double_3.c >> >=================================================================== >> >--- gcc/testsuite/gcc.target/i386/l_fma_double_3.c (revision 200893) >> >+++ gcc/testsuite/gcc.target/i386/l_fma_double_3.c (working copy) >> >@@ -10,9 +10,9 @@ >> > #include "l_fma_3.h" >> > >> > /* { dg-final { scan-assembler-times "vfmadd132pd" 4 } } */ >> >-/* { dg-final { scan-assembler-times "vfmadd213pd" 4 } } */ >> >+/* { dg-final { scan-assembler-times "vfmadd231pd" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfmsub132pd" 4 } } */ >> >-/* { dg-final { scan-assembler-times "vfmsub213pd" 4 } } */ >> >+/* { dg-final { scan-assembler-times "vfmsub231pd" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfnmadd132pd" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */ >> >Index: gcc/testsuite/gcc.target/i386/l_fma_float_3.c >> >=================================================================== >> >--- gcc/testsuite/gcc.target/i386/l_fma_float_3.c (revision 200893) >> >+++ gcc/testsuite/gcc.target/i386/l_fma_float_3.c (working copy) >> >@@ -9,9 +9,9 @@ >> > #include "l_fma_3.h" >> > >> > /* { dg-final { scan-assembler-times "vfmadd132ps" 4 } } */ >> >-/* { dg-final { scan-assembler-times "vfmadd213ps" 4 } } */ >> >+/* { dg-final { scan-assembler-times "vfmadd231ps" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfmsub132ps" 4 } } */ >> >-/* { dg-final { scan-assembler-times "vfmsub213ps" 4 } } */ >> >+/* { dg-final { scan-assembler-times "vfmsub231ps" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfnmadd132ps" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */ >> > /* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */ >> >Index: gcc/tree-vect-data-refs.c >> >=================================================================== >> >--- gcc/tree-vect-data-refs.c (revision 200893) >> >+++ gcc/tree-vect-data-refs.c (working copy) >> >@@ -2311,6 +2311,80 @@ >> > return vect_analyze_group_access (dr); >> > } >> > >> >+ >> >+ >> >+/* Compare two tree nodes. This function is used to compare two >> >+ data-references as below. */ >> >+ >> >+static int >> >+compare_tree (tree t1, tree t2) >> >+{ >> >+ int i, cmp; >> >+ enum tree_code code; >> >+ char tclass; >> >+ >> >+ if (t1 == t2) >> >+ return 0; >> >+ if (t1 == NULL) >> >+ return -1; >> >+ if (t2 == NULL) >> >+ return 1; >> >+ >> >+ >> >+ if (TREE_CODE (t1) != TREE_CODE (t2)) >> >+ return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1; >> >+ >> >+ code = TREE_CODE (t1); >> >+ switch (code) >> >+ { >> >+ /* For const values, we can just use hash values for >comparisons. >> >*/ >> >+ case INTEGER_CST: >> >+ case REAL_CST: >> >+ case FIXED_CST: >> >+ case STRING_CST: >> >+ case COMPLEX_CST: >> >+ case VECTOR_CST: >> >+ { >> >+ hashval_t h1 = iterative_hash_expr (t1, 0); >> >+ hashval_t h2 = iterative_hash_expr (t2, 0); >> >+ if (h1 != h2) >> >+ return h1 < h2 ? -1 : 1; >> >+ break; >> >+ } >> >+ >> >+ case SSA_NAME: >> >+ cmp = compare_tree (SSA_NAME_VAR (t1), SSA_NAME_VAR (t2)); >> >+ if (cmp != 0) >> >+ return cmp; >> >+ >> >+ if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2)) >> >+ return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1; >> >+ break; >> >+ >> >+ default: >> >+ tclass = TREE_CODE_CLASS (code); >> >+ >> >+ /* For var-decl, we could compare their UIDs. */ >> >+ if (tclass == tcc_declaration) >> >+ { >> >+ if (DECL_UID (t1) != DECL_UID (t2)) >> >+ return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1; >> >+ break; >> >+ } >> >+ >> >+ /* For expressions with operands, compare their operands >> >recursively. */ >> >+ for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i) >> >+ { >> >+ cmp = compare_tree (TREE_OPERAND (t1, i), TREE_OPERAND (t2, i)); >> >+ if (cmp != 0) >> >+ return cmp; >> >+ } >> >+ } >> >+ >> >+ return 0; >> >+} >> >+ >> >+ >> > /* Compare two data-references DRA and DRB to group them into >chunks >> > suitable for grouping. */ >> > >> >@@ -2319,7 +2393,6 @@ >> > { >> > data_reference_p dra = *(data_reference_p *)const_cast<void >*>(dra_); >> > data_reference_p drb = *(data_reference_p *)const_cast<void >*>(drb_); >> >- hashval_t h1, h2; >> > int cmp; >> > >> > /* Stabilize sort. */ >> >@@ -2329,19 +2402,17 @@ >> > /* Ordering of DRs according to base. */ >> >if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), >0)) >> > { >> >- h1 = iterative_hash_expr (DR_BASE_ADDRESS (dra), 0); >> >- h2 = iterative_hash_expr (DR_BASE_ADDRESS (drb), 0); >> >- if (h1 != h2) >> >- return h1 < h2 ? -1 : 1; >> >+ cmp = compare_tree (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS >> >(drb)); >> >+ if (cmp != 0) >> >+ return cmp; >> > } >> > >> > /* And according to DR_OFFSET. */ >> > if (!dr_equal_offsets_p (dra, drb)) >> > { >> >- h1 = iterative_hash_expr (DR_OFFSET (dra), 0); >> >- h2 = iterative_hash_expr (DR_OFFSET (drb), 0); >> >- if (h1 != h2) >> >- return h1 < h2 ? -1 : 1; >> >+ cmp = compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)); >> >+ if (cmp != 0) >> >+ return cmp; >> > } >> > >> > /* Put reads before writes. */ >> >@@ -2352,19 +2423,18 @@ >> > if (!operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))), >> > TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))), 0)) >> > { >> >- h1 = iterative_hash_expr (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF >> >(dra))), 0); >> >- h2 = iterative_hash_expr (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF >> >(drb))), 0); >> >- if (h1 != h2) >> >- return h1 < h2 ? -1 : 1; >> >+ cmp = compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF >(dra))), >> >+ TYPE_SIZE_UNIT (TREE_TYPE (DR_REF >(drb)))); >> >+ if (cmp != 0) >> >+ return cmp; >> > } >> > >> > /* And after step. */ >> > if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0)) >> > { >> >- h1 = iterative_hash_expr (DR_STEP (dra), 0); >> >- h2 = iterative_hash_expr (DR_STEP (drb), 0); >> >- if (h1 != h2) >> >- return h1 < h2 ? -1 : 1; >> >+ cmp = compare_tree (DR_STEP (dra), DR_STEP (drb)); >> >+ if (cmp != 0) >> >+ return cmp; >> > } >> > >> > /* Then sort after DR_INIT. In case of identical DRs sort after >> >stmt UID. */ >> >> >>
On 07/12/2013 04:13 PM, Cong Hou wrote: > GCC bootstrap failed with loop vectorizer turned on by default at O2. > The symptom is that the comparison between stage2&3 compilers fails. > The root cause is a bug in the file "tree-vect-data-refs.c", where a > qsort() function call is used to sort a group of data references using > a comparison function called "dr_group_sort_cmp()". In this function, > the iterative hash values of tree nodes are used for comparisons. For > a declaration tree node, its UID participates in the calculation of > the hash value. However, a specific declaration may have different > UIDs whether the debug information is switched on/off (-gtoggle). In > consequence, the results of comparisons may vary in stage 2&3 during > bootstrapping. > > The following patch fixed the bug. Compiler bootstraps and there is no > regressions in regression test. Compiler also bootstraps fine when > turning on vectorizer by default. Since this patch may produce > difference result (but still correct) than before due to the > modification to the comparison function, four test cases are adjusted > accordingly. OK for trunk? If I understand you correctly, you're claiming that the DECL_UID can vary based on whether or not we're emitting debug info. This can be seen in iterative_hash_expr: default: tclass = TREE_CODE_CLASS (code); if (tclass == tcc_declaration) { /* DECL's have a unique ID */ val = iterative_hash_host_wide_int (DECL_UID (t), val); } What doesn't make sense to me is your compare_tree looks at the UIDs just as well: > + > + default: > + tclass = TREE_CODE_CLASS (code); > + > + /* For var-decl, we could compare their UIDs. */ > + if (tclass == tcc_declaration) > + { > + if (DECL_UID (t1) != DECL_UID (t2)) > + return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1; > + break; > + } > + > Why does this work while using iterative_hash_expr fail? Clearly I'm mis-understanding something. jeff
The difference is that the relative order of DECL_UIDs do not change whether debug info is on or not, but there is no such guarantee when hashing is involved. David On Thu, Jul 18, 2013 at 9:45 AM, Jeff Law <law@redhat.com> wrote: > On 07/12/2013 04:13 PM, Cong Hou wrote: >> >> GCC bootstrap failed with loop vectorizer turned on by default at O2. >> The symptom is that the comparison between stage2&3 compilers fails. >> The root cause is a bug in the file "tree-vect-data-refs.c", where a >> qsort() function call is used to sort a group of data references using >> a comparison function called "dr_group_sort_cmp()". In this function, >> the iterative hash values of tree nodes are used for comparisons. For >> a declaration tree node, its UID participates in the calculation of >> the hash value. However, a specific declaration may have different >> UIDs whether the debug information is switched on/off (-gtoggle). In >> consequence, the results of comparisons may vary in stage 2&3 during >> bootstrapping. >> >> The following patch fixed the bug. Compiler bootstraps and there is no >> regressions in regression test. Compiler also bootstraps fine when >> turning on vectorizer by default. Since this patch may produce >> difference result (but still correct) than before due to the >> modification to the comparison function, four test cases are adjusted >> accordingly. OK for trunk? > > If I understand you correctly, you're claiming that the DECL_UID can vary > based on whether or not we're emitting debug info. This can be seen in > iterative_hash_expr: > > > default: > tclass = TREE_CODE_CLASS (code); > > if (tclass == tcc_declaration) > { > /* DECL's have a unique ID */ > val = iterative_hash_host_wide_int (DECL_UID (t), val); > } > > > What doesn't make sense to me is your compare_tree looks at the UIDs just as > well: > > > > >> + >> + default: >> + tclass = TREE_CODE_CLASS (code); >> + >> + /* For var-decl, we could compare their UIDs. */ >> + if (tclass == tcc_declaration) >> + { >> + if (DECL_UID (t1) != DECL_UID (t2)) >> + return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1; >> + break; >> + } >> + >> > > > Why does this work while using iterative_hash_expr fail? > > Clearly I'm mis-understanding something. > > jeff
On Thu, Jul 18, 2013 at 10:45:19AM -0600, Jeff Law wrote: > If I understand you correctly, you're claiming that the DECL_UID can > vary based on whether or not we're emitting debug info. This can be > seen in iterative_hash_expr: Yes, for e.g. SSA_NAME_VERSION, we require that it is the same in between -g -fvar-tracking-assignments and -g0, for DECL_UIDs, only the relative ordering must be preserved, as in, for VTA the gaps between DECL_UID of decls seen both in -g0 and -g code can be bigger. So -g0 can use for decls a, c, b uids say 12 13 14, while -g can use 12 27 39. Thus hashing the DECL_UID if e.g. the hash table is later on traversed could lead to -fcompare-debug failures. Jakub
On 07/18/2013 10:49 AM, Xinliang David Li wrote: > The difference is that the relative order of DECL_UIDs do not change > whether debug info is on or not, but there is no such guarantee when > hashing is involved. Yea. I feel like an idiot for missing the hash vs comparison. Add a ChangeLog and this is good to go. Jeff
We all missed it when just staring at the code :) David On Fri, Jul 19, 2013 at 5:45 AM, Jeff Law <law@redhat.com> wrote: > On 07/18/2013 10:49 AM, Xinliang David Li wrote: >> >> The difference is that the relative order of DECL_UIDs do not change >> whether debug info is on or not, but there is no such guarantee when >> hashing is involved. > > Yea. I feel like an idiot for missing the hash vs comparison. > > Add a ChangeLog and this is good to go. > > Jeff >
Index: gcc/testsuite/gcc.target/i386/l_fma_double_1.c =================================================================== --- gcc/testsuite/gcc.target/i386/l_fma_double_1.c (revision 200893) +++ gcc/testsuite/gcc.target/i386/l_fma_double_1.c (working copy) @@ -10,9 +10,9 @@ #include "l_fma_1.h" /* { dg-final { scan-assembler-times "vfmadd132pd" 4 } } */ -/* { dg-final { scan-assembler-times "vfmadd213pd" 4 } } */ +/* { dg-final { scan-assembler-times "vfmadd231pd" 4 } } */ /* { dg-final { scan-assembler-times "vfmsub132pd" 4 } } */ -/* { dg-final { scan-assembler-times "vfmsub213pd" 4 } } */ +/* { dg-final { scan-assembler-times "vfmsub231pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmadd132pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */ Index: gcc/testsuite/gcc.target/i386/l_fma_float_1.c =================================================================== --- gcc/testsuite/gcc.target/i386/l_fma_float_1.c (revision 200893) +++ gcc/testsuite/gcc.target/i386/l_fma_float_1.c (working copy) @@ -9,9 +9,9 @@ #include "l_fma_1.h" /* { dg-final { scan-assembler-times "vfmadd132ps" 4 } } */ -/* { dg-final { scan-assembler-times "vfmadd213ps" 4 } } */ +/* { dg-final { scan-assembler-times "vfmadd231ps" 4 } } */ /* { dg-final { scan-assembler-times "vfmsub132ps" 4 } } */ -/* { dg-final { scan-assembler-times "vfmsub213ps" 4 } } */ +/* { dg-final { scan-assembler-times "vfmsub231ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */ Index: gcc/testsuite/gcc.target/i386/l_fma_double_3.c =================================================================== --- gcc/testsuite/gcc.target/i386/l_fma_double_3.c (revision 200893) +++ gcc/testsuite/gcc.target/i386/l_fma_double_3.c (working copy) @@ -10,9 +10,9 @@ #include "l_fma_3.h" /* { dg-final { scan-assembler-times "vfmadd132pd" 4 } } */ -/* { dg-final { scan-assembler-times "vfmadd213pd" 4 } } */ +/* { dg-final { scan-assembler-times "vfmadd231pd" 4 } } */ /* { dg-final { scan-assembler-times "vfmsub132pd" 4 } } */ -/* { dg-final { scan-assembler-times "vfmsub213pd" 4 } } */ +/* { dg-final { scan-assembler-times "vfmsub231pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmadd132pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */ Index: gcc/testsuite/gcc.target/i386/l_fma_float_3.c =================================================================== --- gcc/testsuite/gcc.target/i386/l_fma_float_3.c (revision 200893) +++ gcc/testsuite/gcc.target/i386/l_fma_float_3.c (working copy) @@ -9,9 +9,9 @@ #include "l_fma_3.h" /* { dg-final { scan-assembler-times "vfmadd132ps" 4 } } */ -/* { dg-final { scan-assembler-times "vfmadd213ps" 4 } } */ +/* { dg-final { scan-assembler-times "vfmadd231ps" 4 } } */ /* { dg-final { scan-assembler-times "vfmsub132ps" 4 } } */ -/* { dg-final { scan-assembler-times "vfmsub213ps" 4 } } */ +/* { dg-final { scan-assembler-times "vfmsub231ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */ Index: gcc/tree-vect-data-refs.c =================================================================== --- gcc/tree-vect-data-refs.c (revision 200893) +++ gcc/tree-vect-data-refs.c (working copy) @@ -2311,6 +2311,80 @@ return vect_analyze_group_access (dr); } + + +/* Compare two tree nodes. This function is used to compare two + data-references as below. */ + +static int +compare_tree (tree t1, tree t2) +{ + int i, cmp; + enum tree_code code; + char tclass; + + if (t1 == t2) + return 0; + if (t1 == NULL) + return -1; + if (t2 == NULL) + return 1; + + + if (TREE_CODE (t1) != TREE_CODE (t2)) + return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1; + + code = TREE_CODE (t1); + switch (code) + { + /* For const values, we can just use hash values for comparisons. */ + case INTEGER_CST: + case REAL_CST: + case FIXED_CST: + case STRING_CST: + case COMPLEX_CST: + case VECTOR_CST: + { + hashval_t h1 = iterative_hash_expr (t1, 0); + hashval_t h2 = iterative_hash_expr (t2, 0); + if (h1 != h2) + return h1 < h2 ? -1 : 1; + break; + } + + case SSA_NAME: + cmp = compare_tree (SSA_NAME_VAR (t1), SSA_NAME_VAR (t2)); + if (cmp != 0) + return cmp; + + if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2)) + return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1; + break; + + default: + tclass = TREE_CODE_CLASS (code); + + /* For var-decl, we could compare their UIDs. */ + if (tclass == tcc_declaration) + { + if (DECL_UID (t1) != DECL_UID (t2)) + return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1; + break; + } + + /* For expressions with operands, compare their operands recursively. */ + for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i) + { + cmp = compare_tree (TREE_OPERAND (t1, i), TREE_OPERAND (t2, i)); + if (cmp != 0) + return cmp; + } + } + + return 0; +} + + /* Compare two data-references DRA and DRB to group them into chunks suitable for grouping. */ @@ -2319,7 +2393,6 @@ { data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_); data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_); - hashval_t h1, h2; int cmp; /* Stabilize sort. */ @@ -2329,19 +2402,17 @@ /* Ordering of DRs according to base. */ if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)) { - h1 = iterative_hash_expr (DR_BASE_ADDRESS (dra), 0); - h2 = iterative_hash_expr (DR_BASE_ADDRESS (drb), 0); - if (h1 != h2) - return h1 < h2 ? -1 : 1; + cmp = compare_tree (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb)); + if (cmp != 0) + return cmp; } /* And according to DR_OFFSET. */ if (!dr_equal_offsets_p (dra, drb)) { - h1 = iterative_hash_expr (DR_OFFSET (dra), 0); - h2 = iterative_hash_expr (DR_OFFSET (drb), 0); - if (h1 != h2) - return h1 < h2 ? -1 : 1; + cmp = compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)); + if (cmp != 0) + return cmp; } /* Put reads before writes. */ @@ -2352,19 +2423,18 @@ if (!operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))), TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))), 0)) { - h1 = iterative_hash_expr (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))), 0); - h2 = iterative_hash_expr (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))), 0); - if (h1 != h2) - return h1 < h2 ? -1 : 1; + cmp = compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))), + TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)))); + if (cmp != 0) + return cmp; } /* And after step. */ if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0)) { - h1 = iterative_hash_expr (DR_STEP (dra), 0); - h2 = iterative_hash_expr (DR_STEP (drb), 0); - if (h1 != h2) - return h1 < h2 ? -1 : 1; + cmp = compare_tree (DR_STEP (dra), DR_STEP (drb)); + if (cmp != 0) + return cmp; } /* Then sort after DR_INIT. In case of identical DRs sort after