diff mbox

Fixup INTEGER_CST

Message ID 20121007151519.GA16329@kam.mff.cuni.cz
State New
Headers show

Commit Message

Jan Hubicka Oct. 7, 2012, 3:15 p.m. UTC
Hi,
I added a santy check that after fixup all types that lost in the merging are
really dead.  And it turns out we have some zombies around.

INTEGER_CST needs special care because it is special cased by the streamer.  We also
do not want to do inplace modificaitons on it because that would corrupt the hashtable
used by tree.c's sharing code

Bootstrapped/regtested x86_64-linux, OK?


	PR lto/54839
	* lto/lto.c (remember_with_vars): Also fixup INTEGER_CST.
	(fixup_integer_cst): New functoin.
	(lto_ft_type): Fixup BASETYPE of methods and offsets.

Comments

Richard Biener Oct. 7, 2012, 4:20 p.m. UTC | #1
On Sun, Oct 7, 2012 at 5:15 PM, Jan Hubicka <hubicka@ucw.cz> wrote:
> Hi,
> I added a santy check that after fixup all types that lost in the merging are
> really dead.  And it turns out we have some zombies around.
>
> INTEGER_CST needs special care because it is special cased by the streamer.  We also
> do not want to do inplace modificaitons on it because that would corrupt the hashtable
> used by tree.c's sharing code
>
> Bootstrapped/regtested x86_64-linux, OK?

No, I don't think we want to fixup INTEGER_CSTs this way.  Instead we
want to fixup
them where they end up used unfixed.

Richard.

>
>         PR lto/54839
>         * lto/lto.c (remember_with_vars): Also fixup INTEGER_CST.
>         (fixup_integer_cst): New functoin.
>         (lto_ft_type): Fixup BASETYPE of methods and offsets.
> Index: lto/lto.c
> ===================================================================
> --- lto/lto.c   (revision 192164)
> +++ lto/lto.c   (working copy)
> @@ -1408,11 +1408,36 @@ remember_with_vars (tree t)
>             (tt) = GIMPLE_REGISTER_TYPE (tt); \
>           if (VAR_OR_FUNCTION_DECL_P (tt) && TREE_PUBLIC (tt)) \
>             remember_with_vars (t); \
> +         if (TREE_CODE (tt) == INTEGER_CST) \
> +           (tt) = fixup_integer_cst (tt); \
>         } \
>      } while (0)
>
>  static void lto_fixup_types (tree);
>
> +/* Return integer_cst T with updated type.  */
> +
> +static tree
> +fixup_integer_cst (tree t)
> +{
> +  tree type = GIMPLE_REGISTER_TYPE (TREE_TYPE (t));
> +
> +  if (type == TREE_TYPE (t))
> +    return t;
> +
> +  /* If overflow was set, streamer_read_integer_cst
> +     produced local copy of T. */
> +  if (TREE_OVERFLOW (t))
> +    {
> +      TREE_TYPE (t) = type;
> +      return t;
> +    }
> +  else
> +  /* Otherwise produce new shared node for the new type.  */
> +    return build_int_cst_wide (type, TREE_INT_CST_LOW (t),
> +                              TREE_INT_CST_HIGH (t));
> +}
> +
>  /* Fix up fields of a tree_typed T.  */
>
>  static void
> @@ -1526,6 +1549,11 @@ lto_ft_type (tree t)
>    LTO_FIXUP_TREE (t->type_non_common.binfo);
>
>    LTO_FIXUP_TREE (TYPE_CONTEXT (t));
> +
> +  if (TREE_CODE (t) == METHOD_TYPE)
> +    TYPE_METHOD_BASETYPE (t);
> +  if (TREE_CODE (t) == OFFSET_TYPE)
> +    TYPE_OFFSET_BASETYPE (t);
>  }
>
>  /* Fix up fields of a BINFO T.  */
Jan Hubicka Oct. 7, 2012, 5:22 p.m. UTC | #2
> On Sun, Oct 7, 2012 at 5:15 PM, Jan Hubicka <hubicka@ucw.cz> wrote:
> > Hi,
> > I added a santy check that after fixup all types that lost in the merging are
> > really dead.  And it turns out we have some zombies around.
> >
> > INTEGER_CST needs special care because it is special cased by the streamer.  We also
> > do not want to do inplace modificaitons on it because that would corrupt the hashtable
> > used by tree.c's sharing code
> >
> > Bootstrapped/regtested x86_64-linux, OK?
> 
> No, I don't think we want to fixup INTEGER_CSTs this way.  Instead we
> want to fixup
> them where they end up used unfixed.

Erm, I think it is what the patch does?
It replaces pointers to integer_cst with type that did not survive by pointer
to new integer_cst. (with the optimization that INTEGER_CST with overflow
is changed in place because it is allowed to do so).

Honza
> 
> Richard.
> 
> >
> >         PR lto/54839
> >         * lto/lto.c (remember_with_vars): Also fixup INTEGER_CST.
> >         (fixup_integer_cst): New functoin.
> >         (lto_ft_type): Fixup BASETYPE of methods and offsets.
> > Index: lto/lto.c
> > ===================================================================
> > --- lto/lto.c   (revision 192164)
> > +++ lto/lto.c   (working copy)
> > @@ -1408,11 +1408,36 @@ remember_with_vars (tree t)
> >             (tt) = GIMPLE_REGISTER_TYPE (tt); \
> >           if (VAR_OR_FUNCTION_DECL_P (tt) && TREE_PUBLIC (tt)) \
> >             remember_with_vars (t); \
> > +         if (TREE_CODE (tt) == INTEGER_CST) \
> > +           (tt) = fixup_integer_cst (tt); \
> >         } \
> >      } while (0)
> >
> >  static void lto_fixup_types (tree);
> >
> > +/* Return integer_cst T with updated type.  */
> > +
> > +static tree
> > +fixup_integer_cst (tree t)
> > +{
> > +  tree type = GIMPLE_REGISTER_TYPE (TREE_TYPE (t));
> > +
> > +  if (type == TREE_TYPE (t))
> > +    return t;
> > +
> > +  /* If overflow was set, streamer_read_integer_cst
> > +     produced local copy of T. */
> > +  if (TREE_OVERFLOW (t))
> > +    {
> > +      TREE_TYPE (t) = type;
> > +      return t;
> > +    }
> > +  else
> > +  /* Otherwise produce new shared node for the new type.  */
> > +    return build_int_cst_wide (type, TREE_INT_CST_LOW (t),
> > +                              TREE_INT_CST_HIGH (t));
> > +}
> > +
> >  /* Fix up fields of a tree_typed T.  */
> >
> >  static void
> > @@ -1526,6 +1549,11 @@ lto_ft_type (tree t)
> >    LTO_FIXUP_TREE (t->type_non_common.binfo);
> >
> >    LTO_FIXUP_TREE (TYPE_CONTEXT (t));
> > +
> > +  if (TREE_CODE (t) == METHOD_TYPE)
> > +    TYPE_METHOD_BASETYPE (t);
> > +  if (TREE_CODE (t) == OFFSET_TYPE)
> > +    TYPE_OFFSET_BASETYPE (t);
> >  }
> >
> >  /* Fix up fields of a BINFO T.  */
Richard Biener Oct. 8, 2012, 7:15 a.m. UTC | #3
On Sun, Oct 7, 2012 at 7:22 PM, Jan Hubicka <hubicka@ucw.cz> wrote:
>> On Sun, Oct 7, 2012 at 5:15 PM, Jan Hubicka <hubicka@ucw.cz> wrote:
>> > Hi,
>> > I added a santy check that after fixup all types that lost in the merging are
>> > really dead.  And it turns out we have some zombies around.
>> >
>> > INTEGER_CST needs special care because it is special cased by the streamer.  We also
>> > do not want to do inplace modificaitons on it because that would corrupt the hashtable
>> > used by tree.c's sharing code
>> >
>> > Bootstrapped/regtested x86_64-linux, OK?
>>
>> No, I don't think we want to fixup INTEGER_CSTs this way.  Instead we
>> want to fixup
>> them where they end up used unfixed.
>
> Erm, I think it is what the patch does?

Ah, indeed.

> It replaces pointers to integer_cst with type that did not survive by pointer
> to new integer_cst. (with the optimization that INTEGER_CST with overflow
> is changed in place because it is allowed to do so).

Btw ...

>> > @@ -1526,6 +1549,11 @@ lto_ft_type (tree t)
>> >    LTO_FIXUP_TREE (t->type_non_common.binfo);
>> >
>> >    LTO_FIXUP_TREE (TYPE_CONTEXT (t));
>> > +
>> > +  if (TREE_CODE (t) == METHOD_TYPE)
>> > +    TYPE_METHOD_BASETYPE (t);
>> > +  if (TREE_CODE (t) == OFFSET_TYPE)
>> > +    TYPE_OFFSET_BASETYPE (t);

that looks like a no-op to me ... (both are TYPE_MAXVAL which
is already fixed up).

Thus, ok with removing the above hunk.

Thanks,
Richard.

>> >  }
>> >
>> >  /* Fix up fields of a BINFO T.  */
Jan Hubicka Oct. 8, 2012, 9:18 a.m. UTC | #4
> On Sun, Oct 7, 2012 at 7:22 PM, Jan Hubicka <hubicka@ucw.cz> wrote:
> >> On Sun, Oct 7, 2012 at 5:15 PM, Jan Hubicka <hubicka@ucw.cz> wrote:
> >> > Hi,
> >> > I added a santy check that after fixup all types that lost in the merging are
> >> > really dead.  And it turns out we have some zombies around.
> >> >
> >> > INTEGER_CST needs special care because it is special cased by the streamer.  We also
> >> > do not want to do inplace modificaitons on it because that would corrupt the hashtable
> >> > used by tree.c's sharing code
> >> >
> >> > Bootstrapped/regtested x86_64-linux, OK?
> >>
> >> No, I don't think we want to fixup INTEGER_CSTs this way.  Instead we
> >> want to fixup
> >> them where they end up used unfixed.
> >
> > Erm, I think it is what the patch does?
> 
> Ah, indeed.
> 
> > It replaces pointers to integer_cst with type that did not survive by pointer
> > to new integer_cst. (with the optimization that INTEGER_CST with overflow
> > is changed in place because it is allowed to do so).
> 
> Btw ...
> 
> >> > @@ -1526,6 +1549,11 @@ lto_ft_type (tree t)
> >> >    LTO_FIXUP_TREE (t->type_non_common.binfo);
> >> >
> >> >    LTO_FIXUP_TREE (TYPE_CONTEXT (t));
> >> > +
> >> > +  if (TREE_CODE (t) == METHOD_TYPE)
> >> > +    TYPE_METHOD_BASETYPE (t);
> >> > +  if (TREE_CODE (t) == OFFSET_TYPE)
> >> > +    TYPE_OFFSET_BASETYPE (t);
> 
> that looks like a no-op to me ... (both are TYPE_MAXVAL which
> is already fixed up).

Ah, indeed.  They were result of experimenting with the stale pointers to the
obsoletted types and field decls.  I now understand where they come from.  The
reason is twofold.

  1) after merging records we replace field decls in the cache
     by new ones.  This however does not mean that they die, because
     the existing pointers to them are not replaced.
     I have WIP patch for that that however require one extra pass
     over the list of all trees.
  2) As we query the type_hash while we are rewritting the types,
     we run into instability of the hashtable. This manifests itself
     as an ICE when one adds sanity check that while merging function
     types their arg types are equivalent, too.
     This ICEs compiling i.e. sqlite but I did not really managed to
     reduce this.  I tracked it down to the argument type being inserted
     into gimple_type_hash but at the time we query the new argument type,
     the original is no longer found despite their hashes are equivalent.
     The problem is hidden when things fit into the leader cache,
     so one needs rather big testcase.

So I tried to register all gimple types first.  Use TREE_VISITED within
the merging code to mark that type is not a leader and then TREE_CHAIN 
to point to the leader.  This avoids need to re-query the hashtable
from the later fixups.  We only look for types with TREEE_VISITED
and replace them by TREE_CHAIN.
This has two passes.  First we compute the main variants and mark
field_decls and type_decls for merging and in last pass we finally do
fixup on what remained in the table.

This allows me to poison pointers in the removed types in a way
so the GGC would ICE if they stayed reachable.
I however need the extra pass because
 1) I can not update the type_decls/field_decls while registering
    types or I run into the hash table problems
 2) I can not merge the second two passes because at the time
    I find type/field decls equialent there may be earlier pointers
    to them.

Honza
Richard Biener Oct. 8, 2012, 9:38 a.m. UTC | #5
On Mon, Oct 8, 2012 at 11:18 AM, Jan Hubicka <hubicka@ucw.cz> wrote:
>> On Sun, Oct 7, 2012 at 7:22 PM, Jan Hubicka <hubicka@ucw.cz> wrote:
>> >> On Sun, Oct 7, 2012 at 5:15 PM, Jan Hubicka <hubicka@ucw.cz> wrote:
>> >> > Hi,
>> >> > I added a santy check that after fixup all types that lost in the merging are
>> >> > really dead.  And it turns out we have some zombies around.
>> >> >
>> >> > INTEGER_CST needs special care because it is special cased by the streamer.  We also
>> >> > do not want to do inplace modificaitons on it because that would corrupt the hashtable
>> >> > used by tree.c's sharing code
>> >> >
>> >> > Bootstrapped/regtested x86_64-linux, OK?
>> >>
>> >> No, I don't think we want to fixup INTEGER_CSTs this way.  Instead we
>> >> want to fixup
>> >> them where they end up used unfixed.
>> >
>> > Erm, I think it is what the patch does?
>>
>> Ah, indeed.
>>
>> > It replaces pointers to integer_cst with type that did not survive by pointer
>> > to new integer_cst. (with the optimization that INTEGER_CST with overflow
>> > is changed in place because it is allowed to do so).
>>
>> Btw ...
>>
>> >> > @@ -1526,6 +1549,11 @@ lto_ft_type (tree t)
>> >> >    LTO_FIXUP_TREE (t->type_non_common.binfo);
>> >> >
>> >> >    LTO_FIXUP_TREE (TYPE_CONTEXT (t));
>> >> > +
>> >> > +  if (TREE_CODE (t) == METHOD_TYPE)
>> >> > +    TYPE_METHOD_BASETYPE (t);
>> >> > +  if (TREE_CODE (t) == OFFSET_TYPE)
>> >> > +    TYPE_OFFSET_BASETYPE (t);
>>
>> that looks like a no-op to me ... (both are TYPE_MAXVAL which
>> is already fixed up).
>
> Ah, indeed.  They were result of experimenting with the stale pointers to the
> obsoletted types and field decls.  I now understand where they come from.  The
> reason is twofold.
>
>   1) after merging records we replace field decls in the cache
>      by new ones.  This however does not mean that they die, because
>      the existing pointers to them are not replaced.
>      I have WIP patch for that that however require one extra pass
>      over the list of all trees.

Yes, I think this is also why we do

                      /* ???  Not sure the above is all relevant in this
                         path canonicalizing TYPE_FIELDS to that of the
                         main variant.  */
                      if (ix < i)
                        lto_fixup_types (f2);
                      streamer_tree_cache_insert_at (cache, f1, ix);

something I dislike as well and something we should try to address in a
more formal way.

>   2) As we query the type_hash while we are rewritting the types,
>      we run into instability of the hashtable. This manifests itself
>      as an ICE when one adds sanity check that while merging function
>      types their arg types are equivalent, too.
>      This ICEs compiling i.e. sqlite but I did not really managed to
>      reduce this.  I tracked it down to the argument type being inserted
>      into gimple_type_hash but at the time we query the new argument type,
>      the original is no longer found despite their hashes are equivalent.
>      The problem is hidden when things fit into the leader cache,
>      so one needs rather big testcase.

Ugh.  For reduction you can disable those caches though.  The above
means there is a disconnect between hashing and comparing.
Maybe it's something weird with the early out

      if (TYPE_ARG_TYPES (t1) == TYPE_ARG_TYPES (t2))
        goto same_types;
?

> So I tried to register all gimple types first.  Use TREE_VISITED within
> the merging code to mark that type is not a leader and then TREE_CHAIN
> to point to the leader.  This avoids need to re-query the hashtable
> from the later fixups.  We only look for types with TREEE_VISITED
> and replace them by TREE_CHAIN.

TREE_CHAIN is unused for types?  But we probably shouldn't add a new
use ...

> This has two passes.  First we compute the main variants and mark
> field_decls and type_decls for merging and in last pass we finally do
> fixup on what remained in the table.
>
> This allows me to poison pointers in the removed types in a way
> so the GGC would ICE if they stayed reachable.
> I however need the extra pass because
>  1) I can not update the type_decls/field_decls while registering
>     types or I run into the hash table problems
>  2) I can not merge the second two passes because at the time
>     I find type/field decls equialent there may be earlier pointers
>     to them.

You need to "merge" all trees reachable from the one you start at once
(what I'm working on from time to time - work per tree "SCC", in a DFS
walk).

Richard.

> Honza
Jan Hubicka Oct. 8, 2012, 9:45 a.m. UTC | #6
> >   2) As we query the type_hash while we are rewritting the types,
> >      we run into instability of the hashtable. This manifests itself
> >      as an ICE when one adds sanity check that while merging function
> >      types their arg types are equivalent, too.
> >      This ICEs compiling i.e. sqlite but I did not really managed to
> >      reduce this.  I tracked it down to the argument type being inserted
> >      into gimple_type_hash but at the time we query the new argument type,
> >      the original is no longer found despite their hashes are equivalent.
> >      The problem is hidden when things fit into the leader cache,
> >      so one needs rather big testcase.
> 
> Ugh.  For reduction you can disable those caches though.  The above
> means there is a disconnect between hashing and comparing.
> Maybe it's something weird with the early out
> 
>       if (TYPE_ARG_TYPES (t1) == TYPE_ARG_TYPES (t2))
>         goto same_types;
> ?

Well, the problem goes away when you process all types before changing, so I
think it really is instability of hash table computation. But I am not sure how
to test for it.
Even disabling the caching and recomputing after gimple_register_type leads
to different results.
> 
> > So I tried to register all gimple types first.  Use TREE_VISITED within
> > the merging code to mark that type is not a leader and then TREE_CHAIN
> > to point to the leader.  This avoids need to re-query the hashtable
> > from the later fixups.  We only look for types with TREEE_VISITED
> > and replace them by TREE_CHAIN.
> 
> TREE_CHAIN is unused for types?  But we probably shouldn't add a new
> use ...

It is used, but unused for type merging.  
 /* Nodes are chained together for many purposes.
   Types are chained together to record them for being output to the debugger
   (see the function `chain_type'). */

We know that types that lost merging will not be used later, so we can
overwrite pointers we don't need.

When one removes the type from variant list during registering, one can
also use TYPE_MAIN_VARIANT, for example.
> 
> > This has two passes.  First we compute the main variants and mark
> > field_decls and type_decls for merging and in last pass we finally do
> > fixup on what remained in the table.
> >
> > This allows me to poison pointers in the removed types in a way
> > so the GGC would ICE if they stayed reachable.
> > I however need the extra pass because
> >  1) I can not update the type_decls/field_decls while registering
> >     types or I run into the hash table problems
> >  2) I can not merge the second two passes because at the time
> >     I find type/field decls equialent there may be earlier pointers
> >     to them.
> 
> You need to "merge" all trees reachable from the one you start at once
> (what I'm working on from time to time - work per tree "SCC", in a DFS
> walk).

Yep, doing things per-SCC is definitely good idea. 

It will also give a chance to improve the hash itself.  If you process in SCC
order you know that all references outside SCC have already leaders set and you
can hash their addresses rather than using the weak hash.

I would really love to see this done.  After updating Mozilla we now need 10GB
of RAM and about 18 minutes for merging (they merged in new JIT that aparently
plays badly with our types). This makes any development/testing difficult.

Honza
Jan Hubicka Oct. 8, 2012, 2:19 p.m. UTC | #7
> >   2) As we query the type_hash while we are rewritting the types,
> >      we run into instability of the hashtable. This manifests itself
> >      as an ICE when one adds sanity check that while merging function
> >      types their arg types are equivalent, too.
> >      This ICEs compiling i.e. sqlite but I did not really managed to
> >      reduce this.  I tracked it down to the argument type being inserted
> >      into gimple_type_hash but at the time we query the new argument type,
> >      the original is no longer found despite their hashes are equivalent.
> >      The problem is hidden when things fit into the leader cache,
> >      so one needs rather big testcase.
> 
> Ugh.  For reduction you can disable those caches though.  The above
> means there is a disconnect between hashing and comparing.
> Maybe it's something weird with the early out
> 
>       if (TYPE_ARG_TYPES (t1) == TYPE_ARG_TYPES (t2))
>         goto same_types;
> ?

I filled in http://gcc.gnu.org/bugzilla/show_bug.cgi?id=54856 sadly the
testcase I reduced with yesterday tree do not reproduce on today tree on
different machine.  Perhaps it is hash table conflict with GGC or something
like that.

sqlite seems big enough to trigger the bug quite reproducibly. On current
mainline I however need to disable leader cache (that was not true on weekend
on the other machine ;)

Honza
diff mbox

Patch

Index: lto/lto.c
===================================================================
--- lto/lto.c	(revision 192164)
+++ lto/lto.c	(working copy)
@@ -1408,11 +1408,36 @@  remember_with_vars (tree t)
 	    (tt) = GIMPLE_REGISTER_TYPE (tt); \
 	  if (VAR_OR_FUNCTION_DECL_P (tt) && TREE_PUBLIC (tt)) \
 	    remember_with_vars (t); \
+	  if (TREE_CODE (tt) == INTEGER_CST) \
+	    (tt) = fixup_integer_cst (tt); \
 	} \
     } while (0)
 
 static void lto_fixup_types (tree);
 
+/* Return integer_cst T with updated type.  */
+
+static tree
+fixup_integer_cst (tree t)
+{
+  tree type = GIMPLE_REGISTER_TYPE (TREE_TYPE (t));
+
+  if (type == TREE_TYPE (t))
+    return t;
+
+  /* If overflow was set, streamer_read_integer_cst
+     produced local copy of T. */
+  if (TREE_OVERFLOW (t))
+    {
+      TREE_TYPE (t) = type;
+      return t;
+    }
+  else
+  /* Otherwise produce new shared node for the new type.  */
+    return build_int_cst_wide (type, TREE_INT_CST_LOW (t),
+			       TREE_INT_CST_HIGH (t));
+}
+
 /* Fix up fields of a tree_typed T.  */
 
 static void
@@ -1526,6 +1549,11 @@  lto_ft_type (tree t)
   LTO_FIXUP_TREE (t->type_non_common.binfo);
 
   LTO_FIXUP_TREE (TYPE_CONTEXT (t));
+
+  if (TREE_CODE (t) == METHOD_TYPE)
+    TYPE_METHOD_BASETYPE (t);
+  if (TREE_CODE (t) == OFFSET_TYPE)
+    TYPE_OFFSET_BASETYPE (t);
 }
 
 /* Fix up fields of a BINFO T.  */